Skip to main content

use_archive_format/
lib.rs

1#![forbid(unsafe_code)]
2#![doc = include_str!("../README.md")]
3
4//! Archive and compression format labels for `RustUse`.
5
6use core::{convert::Infallible, fmt, str::FromStr};
7
8/// Archive container formats.
9#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
10pub enum ArchiveFormat {
11    /// POSIX tar archive.
12    Tar,
13    /// ZIP archive.
14    Zip,
15    /// 7z archive.
16    SevenZip,
17    /// CPIO archive.
18    Cpio,
19    /// Unix ar archive.
20    Ar,
21    /// ISO image.
22    Iso,
23    /// RAR archive.
24    Rar,
25    /// Microsoft Cabinet archive.
26    Cab,
27    /// WARC or ARC web archive.
28    Warc,
29    /// mtree manifest.
30    Mtree,
31    /// Unknown or intentionally unspecified archive format.
32    #[default]
33    Unknown,
34}
35
36impl ArchiveFormat {
37    /// Returns a stable lowercase label.
38    #[must_use]
39    pub const fn as_str(self) -> &'static str {
40        match self {
41            Self::Tar => "tar",
42            Self::Zip => "zip",
43            Self::SevenZip => "7z",
44            Self::Cpio => "cpio",
45            Self::Ar => "ar",
46            Self::Iso => "iso",
47            Self::Rar => "rar",
48            Self::Cab => "cab",
49            Self::Warc => "warc",
50            Self::Mtree => "mtree",
51            Self::Unknown => "unknown",
52        }
53    }
54
55    /// Returns the most common file extension for this archive format.
56    #[must_use]
57    pub const fn extension(self) -> Option<&'static str> {
58        match self {
59            Self::Tar => Some("tar"),
60            Self::Zip => Some("zip"),
61            Self::SevenZip => Some("7z"),
62            Self::Cpio => Some("cpio"),
63            Self::Ar => Some("ar"),
64            Self::Iso => Some("iso"),
65            Self::Rar => Some("rar"),
66            Self::Cab => Some("cab"),
67            Self::Warc => Some("warc"),
68            Self::Mtree => Some("mtree"),
69            Self::Unknown => None,
70        }
71    }
72
73    /// Parses a stable format label.
74    #[must_use]
75    pub fn from_label(input: &str) -> Self {
76        match input.trim().to_ascii_lowercase().as_str() {
77            "tar" => Self::Tar,
78            "zip" => Self::Zip,
79            "7z" | "sevenzip" | "seven-zip" => Self::SevenZip,
80            "cpio" => Self::Cpio,
81            "ar" => Self::Ar,
82            "iso" | "iso9660" | "iso-9660" => Self::Iso,
83            "rar" | "rar4" | "rar5" => Self::Rar,
84            "cab" | "cabinet" => Self::Cab,
85            "warc" | "arc" | "web-archive" => Self::Warc,
86            "mtree" => Self::Mtree,
87            _ => Self::Unknown,
88        }
89    }
90
91    /// Detects an archive format from a filename or extension.
92    #[must_use]
93    pub fn from_extension(input: &str) -> Self {
94        ArchiveEncoding::from_extension(input).archive
95    }
96
97    /// Returns whether this is a known archive container format.
98    #[must_use]
99    pub const fn is_known(self) -> bool {
100        !matches!(self, Self::Unknown)
101    }
102}
103
104impl fmt::Display for ArchiveFormat {
105    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
106        formatter.write_str(self.as_str())
107    }
108}
109
110impl FromStr for ArchiveFormat {
111    type Err = Infallible;
112
113    fn from_str(input: &str) -> Result<Self, Self::Err> {
114        Ok(Self::from_label(input))
115    }
116}
117
118/// Compression codecs commonly wrapped around archive payloads.
119#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
120pub enum CompressionFormat {
121    /// No outer compression wrapper.
122    #[default]
123    None,
124    /// Gzip compression.
125    Gzip,
126    /// Bzip2 compression.
127    Bzip2,
128    /// XZ or LZMA compression.
129    Xz,
130    /// Zstandard compression.
131    Zstd,
132    /// Brotli compression.
133    Brotli,
134    /// LZ4 compression.
135    Lz4,
136    /// Unknown or intentionally unspecified compression format.
137    Unknown,
138}
139
140impl CompressionFormat {
141    /// Returns a stable lowercase label.
142    #[must_use]
143    pub const fn as_str(self) -> &'static str {
144        match self {
145            Self::None => "none",
146            Self::Gzip => "gzip",
147            Self::Bzip2 => "bzip2",
148            Self::Xz => "xz",
149            Self::Zstd => "zstd",
150            Self::Brotli => "brotli",
151            Self::Lz4 => "lz4",
152            Self::Unknown => "unknown",
153        }
154    }
155
156    /// Returns the most common file extension for this compression format.
157    #[must_use]
158    pub const fn extension(self) -> Option<&'static str> {
159        match self {
160            Self::Gzip => Some("gz"),
161            Self::Bzip2 => Some("bz2"),
162            Self::Xz => Some("xz"),
163            Self::Zstd => Some("zst"),
164            Self::Brotli => Some("br"),
165            Self::Lz4 => Some("lz4"),
166            Self::None | Self::Unknown => None,
167        }
168    }
169
170    /// Parses a stable compression label.
171    #[must_use]
172    pub fn from_label(input: &str) -> Self {
173        match input.trim().to_ascii_lowercase().as_str() {
174            "" | "none" | "stored" => Self::None,
175            "gz" | "gzip" => Self::Gzip,
176            "bz2" | "bzip2" => Self::Bzip2,
177            "xz" | "lzma" => Self::Xz,
178            "zst" | "zstd" | "zstandard" => Self::Zstd,
179            "br" | "brotli" => Self::Brotli,
180            "lz4" => Self::Lz4,
181            _ => Self::Unknown,
182        }
183    }
184
185    /// Detects a compression format from a filename or extension.
186    #[must_use]
187    pub fn from_extension(input: &str) -> Self {
188        ArchiveEncoding::from_extension(input).compression
189    }
190
191    /// Returns whether this is an actual compression wrapper.
192    #[must_use]
193    pub const fn is_compressed(self) -> bool {
194        !matches!(self, Self::None | Self::Unknown)
195    }
196}
197
198impl fmt::Display for CompressionFormat {
199    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
200        formatter.write_str(self.as_str())
201    }
202}
203
204impl FromStr for CompressionFormat {
205    type Err = Infallible;
206
207    fn from_str(input: &str) -> Result<Self, Self::Err> {
208        Ok(Self::from_label(input))
209    }
210}
211
212/// Archive container plus outer compression wrapper.
213#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
214pub struct ArchiveEncoding {
215    /// Archive container format.
216    pub archive: ArchiveFormat,
217    /// Outer compression format, if any.
218    pub compression: CompressionFormat,
219}
220
221impl ArchiveEncoding {
222    /// Creates an archive encoding from explicit parts.
223    #[must_use]
224    pub const fn new(archive: ArchiveFormat, compression: CompressionFormat) -> Self {
225        Self {
226            archive,
227            compression,
228        }
229    }
230
231    /// Returns an unknown archive encoding.
232    #[must_use]
233    pub const fn unknown() -> Self {
234        Self::new(ArchiveFormat::Unknown, CompressionFormat::Unknown)
235    }
236
237    /// Detects an archive encoding from a filename or extension.
238    #[must_use]
239    pub fn from_extension(input: &str) -> Self {
240        let normalized = input.trim().to_ascii_lowercase();
241        let leaf = normalized
242            .rsplit(['/', '\\'])
243            .next()
244            .unwrap_or(normalized.as_str());
245        let extension_like = leaf.trim_start_matches('.');
246        let parts = extension_like
247            .split('.')
248            .filter(|part| !part.is_empty())
249            .collect::<Vec<_>>();
250
251        let Some(last) = parts.last().copied() else {
252            return Self::unknown();
253        };
254
255        if Self::is_seven_zip_volume(&parts) {
256            return Self::new(ArchiveFormat::SevenZip, CompressionFormat::None);
257        }
258
259        if Self::is_rar_part(&parts) {
260            return Self::new(ArchiveFormat::Rar, CompressionFormat::None);
261        }
262
263        if let Some(previous) = parts
264            .len()
265            .checked_sub(2)
266            .and_then(|index| parts.get(index))
267            .copied()
268            && let Some(encoding) = Self::from_combined_parts(previous, last)
269        {
270            return encoding;
271        }
272
273        Self::from_single_extension(last)
274    }
275
276    fn from_combined_parts(previous: &str, last: &str) -> Option<Self> {
277        let compression = Self::compression_from_extension_part(last)?;
278        let archive = match previous {
279            "tar" => ArchiveFormat::Tar,
280            "cpio" => ArchiveFormat::Cpio,
281            "warc" | "arc" => ArchiveFormat::Warc,
282            "mtree" => ArchiveFormat::Mtree,
283            _ => return None,
284        };
285
286        Some(Self::new(archive, compression))
287    }
288
289    fn compression_from_extension_part(extension: &str) -> Option<CompressionFormat> {
290        match extension {
291            "gz" | "gzip" => Some(CompressionFormat::Gzip),
292            "bz2" | "bzip2" => Some(CompressionFormat::Bzip2),
293            "xz" | "lzma" => Some(CompressionFormat::Xz),
294            "zst" | "zstd" => Some(CompressionFormat::Zstd),
295            "br" | "brotli" => Some(CompressionFormat::Brotli),
296            "lz4" => Some(CompressionFormat::Lz4),
297            _ => None,
298        }
299    }
300
301    fn is_seven_zip_volume(parts: &[&str]) -> bool {
302        let Some(last) = parts.last().copied() else {
303            return false;
304        };
305        let Some(previous) = parts
306            .len()
307            .checked_sub(2)
308            .and_then(|index| parts.get(index))
309            .copied()
310        else {
311            return false;
312        };
313
314        previous == "7z" && is_three_digit_part(last)
315    }
316
317    fn is_rar_part(parts: &[&str]) -> bool {
318        let Some(last) = parts.last().copied() else {
319            return false;
320        };
321
322        if is_rar_old_part(last) {
323            return true;
324        }
325
326        let Some(previous) = parts
327            .len()
328            .checked_sub(2)
329            .and_then(|index| parts.get(index))
330            .copied()
331        else {
332            return false;
333        };
334
335        last == "rar" && is_part_label(previous)
336    }
337
338    fn from_single_extension(extension: &str) -> Self {
339        match extension {
340            "tgz" => Self::new(ArchiveFormat::Tar, CompressionFormat::Gzip),
341            "tbz" | "tbz2" => Self::new(ArchiveFormat::Tar, CompressionFormat::Bzip2),
342            "txz" | "tlz" => Self::new(ArchiveFormat::Tar, CompressionFormat::Xz),
343            "tzst" => Self::new(ArchiveFormat::Tar, CompressionFormat::Zstd),
344            "tbr" => Self::new(ArchiveFormat::Tar, CompressionFormat::Brotli),
345            "tar" => Self::new(ArchiveFormat::Tar, CompressionFormat::None),
346            "zip" => Self::new(ArchiveFormat::Zip, CompressionFormat::None),
347            "7z" => Self::new(ArchiveFormat::SevenZip, CompressionFormat::None),
348            "cpio" => Self::new(ArchiveFormat::Cpio, CompressionFormat::None),
349            "a" | "ar" | "deb" => Self::new(ArchiveFormat::Ar, CompressionFormat::None),
350            "iso" | "img" => Self::new(ArchiveFormat::Iso, CompressionFormat::None),
351            "rar" => Self::new(ArchiveFormat::Rar, CompressionFormat::None),
352            "cab" => Self::new(ArchiveFormat::Cab, CompressionFormat::None),
353            "warc" | "arc" => Self::new(ArchiveFormat::Warc, CompressionFormat::None),
354            "mtree" => Self::new(ArchiveFormat::Mtree, CompressionFormat::None),
355            "gz" | "gzip" => Self::new(ArchiveFormat::Unknown, CompressionFormat::Gzip),
356            "bz2" | "bzip2" => Self::new(ArchiveFormat::Unknown, CompressionFormat::Bzip2),
357            "xz" | "lzma" => Self::new(ArchiveFormat::Unknown, CompressionFormat::Xz),
358            "zst" | "zstd" => Self::new(ArchiveFormat::Unknown, CompressionFormat::Zstd),
359            "br" | "brotli" => Self::new(ArchiveFormat::Unknown, CompressionFormat::Brotli),
360            "lz4" => Self::new(ArchiveFormat::Unknown, CompressionFormat::Lz4),
361            _ => Self::unknown(),
362        }
363    }
364
365    /// Detects an archive encoding from a filename.
366    #[must_use]
367    pub fn from_filename(input: &str) -> Self {
368        Self::from_extension(input)
369    }
370
371    /// Returns whether the archive container is known.
372    #[must_use]
373    pub const fn has_archive(self) -> bool {
374        self.archive.is_known()
375    }
376
377    /// Returns whether an outer compression wrapper is known.
378    #[must_use]
379    pub const fn is_compressed(self) -> bool {
380        self.compression.is_compressed()
381    }
382}
383
384fn is_three_digit_part(part: &str) -> bool {
385    part.len() == 3 && part.bytes().all(|byte| byte.is_ascii_digit())
386}
387
388fn is_rar_old_part(part: &str) -> bool {
389    let bytes = part.as_bytes();
390    bytes.len() == 3 && bytes[0] == b'r' && bytes[1].is_ascii_digit() && bytes[2].is_ascii_digit()
391}
392
393fn is_part_label(part: &str) -> bool {
394    let Some(number) = part.strip_prefix("part") else {
395        return false;
396    };
397
398    !number.is_empty() && number.bytes().all(|byte| byte.is_ascii_digit())
399}
400
401impl fmt::Display for ArchiveEncoding {
402    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
403        if self.compression == CompressionFormat::None {
404            formatter.write_str(self.archive.as_str())
405        } else {
406            write!(formatter, "{}+{}", self.archive, self.compression)
407        }
408    }
409}
410
411#[cfg(test)]
412mod tests {
413    use super::{ArchiveEncoding, ArchiveFormat, CompressionFormat};
414
415    #[test]
416    fn detects_common_archive_encodings() {
417        assert_eq!(
418            ArchiveEncoding::from_extension("release.tar.zst"),
419            ArchiveEncoding::new(ArchiveFormat::Tar, CompressionFormat::Zstd)
420        );
421        assert_eq!(
422            ArchiveEncoding::from_extension("bundle.tgz"),
423            ArchiveEncoding::new(ArchiveFormat::Tar, CompressionFormat::Gzip)
424        );
425        assert_eq!(
426            ArchiveEncoding::from_extension("assets.zip"),
427            ArchiveEncoding::new(ArchiveFormat::Zip, CompressionFormat::None)
428        );
429        assert_eq!(
430            ArchiveEncoding::from_extension("initramfs.cpio.gz"),
431            ArchiveEncoding::new(ArchiveFormat::Cpio, CompressionFormat::Gzip)
432        );
433        assert_eq!(
434            ArchiveEncoding::from_extension("crawl.warc.gz"),
435            ArchiveEncoding::new(ArchiveFormat::Warc, CompressionFormat::Gzip)
436        );
437        assert_eq!(
438            ArchiveEncoding::from_extension("manifest.mtree.gz"),
439            ArchiveEncoding::new(ArchiveFormat::Mtree, CompressionFormat::Gzip)
440        );
441    }
442
443    #[test]
444    fn detects_extension_labels() {
445        assert_eq!(ArchiveFormat::from_extension(".tar"), ArchiveFormat::Tar);
446        assert_eq!(
447            ArchiveFormat::from_extension("libexample.a"),
448            ArchiveFormat::Ar
449        );
450        assert_eq!(
451            ArchiveFormat::from_extension("installer.img"),
452            ArchiveFormat::Iso
453        );
454        assert_eq!(
455            ArchiveFormat::from_extension("bundle.7z.001"),
456            ArchiveFormat::SevenZip
457        );
458        assert_eq!(
459            ArchiveFormat::from_extension("backup.part1.rar"),
460            ArchiveFormat::Rar
461        );
462        assert_eq!(
463            ArchiveFormat::from_extension("driver.cab"),
464            ArchiveFormat::Cab
465        );
466        assert_eq!(
467            ArchiveFormat::from_extension("crawl.arc.gz"),
468            ArchiveFormat::Warc
469        );
470        assert_eq!(
471            CompressionFormat::from_extension("xz"),
472            CompressionFormat::Xz
473        );
474    }
475
476    #[test]
477    fn preserves_unknown_archive_and_compression() {
478        let encoding = ArchiveEncoding::from_filename("notes.txt");
479
480        assert_eq!(encoding.archive, ArchiveFormat::Unknown);
481        assert_eq!(encoding.compression, CompressionFormat::Unknown);
482    }
483}