1#![forbid(unsafe_code)]
2#![doc = include_str!("../README.md")]
3
4use core::{convert::Infallible, fmt, str::FromStr};
7
8#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
10pub enum ArchiveFormat {
11 Tar,
13 Zip,
15 SevenZip,
17 Cpio,
19 Ar,
21 Iso,
23 Rar,
25 Cab,
27 Warc,
29 Mtree,
31 #[default]
33 Unknown,
34}
35
36impl ArchiveFormat {
37 #[must_use]
39 pub const fn as_str(self) -> &'static str {
40 match self {
41 Self::Tar => "tar",
42 Self::Zip => "zip",
43 Self::SevenZip => "7z",
44 Self::Cpio => "cpio",
45 Self::Ar => "ar",
46 Self::Iso => "iso",
47 Self::Rar => "rar",
48 Self::Cab => "cab",
49 Self::Warc => "warc",
50 Self::Mtree => "mtree",
51 Self::Unknown => "unknown",
52 }
53 }
54
55 #[must_use]
57 pub const fn extension(self) -> Option<&'static str> {
58 match self {
59 Self::Tar => Some("tar"),
60 Self::Zip => Some("zip"),
61 Self::SevenZip => Some("7z"),
62 Self::Cpio => Some("cpio"),
63 Self::Ar => Some("ar"),
64 Self::Iso => Some("iso"),
65 Self::Rar => Some("rar"),
66 Self::Cab => Some("cab"),
67 Self::Warc => Some("warc"),
68 Self::Mtree => Some("mtree"),
69 Self::Unknown => None,
70 }
71 }
72
73 #[must_use]
75 pub fn from_label(input: &str) -> Self {
76 match input.trim().to_ascii_lowercase().as_str() {
77 "tar" => Self::Tar,
78 "zip" => Self::Zip,
79 "7z" | "sevenzip" | "seven-zip" => Self::SevenZip,
80 "cpio" => Self::Cpio,
81 "ar" => Self::Ar,
82 "iso" | "iso9660" | "iso-9660" => Self::Iso,
83 "rar" | "rar4" | "rar5" => Self::Rar,
84 "cab" | "cabinet" => Self::Cab,
85 "warc" | "arc" | "web-archive" => Self::Warc,
86 "mtree" => Self::Mtree,
87 _ => Self::Unknown,
88 }
89 }
90
91 #[must_use]
93 pub fn from_extension(input: &str) -> Self {
94 ArchiveEncoding::from_extension(input).archive
95 }
96
97 #[must_use]
99 pub const fn is_known(self) -> bool {
100 !matches!(self, Self::Unknown)
101 }
102}
103
104impl fmt::Display for ArchiveFormat {
105 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
106 formatter.write_str(self.as_str())
107 }
108}
109
110impl FromStr for ArchiveFormat {
111 type Err = Infallible;
112
113 fn from_str(input: &str) -> Result<Self, Self::Err> {
114 Ok(Self::from_label(input))
115 }
116}
117
118#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
120pub enum CompressionFormat {
121 #[default]
123 None,
124 Gzip,
126 Bzip2,
128 Xz,
130 Zstd,
132 Brotli,
134 Lz4,
136 Unknown,
138}
139
140impl CompressionFormat {
141 #[must_use]
143 pub const fn as_str(self) -> &'static str {
144 match self {
145 Self::None => "none",
146 Self::Gzip => "gzip",
147 Self::Bzip2 => "bzip2",
148 Self::Xz => "xz",
149 Self::Zstd => "zstd",
150 Self::Brotli => "brotli",
151 Self::Lz4 => "lz4",
152 Self::Unknown => "unknown",
153 }
154 }
155
156 #[must_use]
158 pub const fn extension(self) -> Option<&'static str> {
159 match self {
160 Self::Gzip => Some("gz"),
161 Self::Bzip2 => Some("bz2"),
162 Self::Xz => Some("xz"),
163 Self::Zstd => Some("zst"),
164 Self::Brotli => Some("br"),
165 Self::Lz4 => Some("lz4"),
166 Self::None | Self::Unknown => None,
167 }
168 }
169
170 #[must_use]
172 pub fn from_label(input: &str) -> Self {
173 match input.trim().to_ascii_lowercase().as_str() {
174 "" | "none" | "stored" => Self::None,
175 "gz" | "gzip" => Self::Gzip,
176 "bz2" | "bzip2" => Self::Bzip2,
177 "xz" | "lzma" => Self::Xz,
178 "zst" | "zstd" | "zstandard" => Self::Zstd,
179 "br" | "brotli" => Self::Brotli,
180 "lz4" => Self::Lz4,
181 _ => Self::Unknown,
182 }
183 }
184
185 #[must_use]
187 pub fn from_extension(input: &str) -> Self {
188 ArchiveEncoding::from_extension(input).compression
189 }
190
191 #[must_use]
193 pub const fn is_compressed(self) -> bool {
194 !matches!(self, Self::None | Self::Unknown)
195 }
196}
197
198impl fmt::Display for CompressionFormat {
199 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
200 formatter.write_str(self.as_str())
201 }
202}
203
204impl FromStr for CompressionFormat {
205 type Err = Infallible;
206
207 fn from_str(input: &str) -> Result<Self, Self::Err> {
208 Ok(Self::from_label(input))
209 }
210}
211
212#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
214pub struct ArchiveEncoding {
215 pub archive: ArchiveFormat,
217 pub compression: CompressionFormat,
219}
220
221impl ArchiveEncoding {
222 #[must_use]
224 pub const fn new(archive: ArchiveFormat, compression: CompressionFormat) -> Self {
225 Self {
226 archive,
227 compression,
228 }
229 }
230
231 #[must_use]
233 pub const fn unknown() -> Self {
234 Self::new(ArchiveFormat::Unknown, CompressionFormat::Unknown)
235 }
236
237 #[must_use]
239 pub fn from_extension(input: &str) -> Self {
240 let normalized = input.trim().to_ascii_lowercase();
241 let leaf = normalized
242 .rsplit(['/', '\\'])
243 .next()
244 .unwrap_or(normalized.as_str());
245 let extension_like = leaf.trim_start_matches('.');
246 let parts = extension_like
247 .split('.')
248 .filter(|part| !part.is_empty())
249 .collect::<Vec<_>>();
250
251 let Some(last) = parts.last().copied() else {
252 return Self::unknown();
253 };
254
255 if Self::is_seven_zip_volume(&parts) {
256 return Self::new(ArchiveFormat::SevenZip, CompressionFormat::None);
257 }
258
259 if Self::is_rar_part(&parts) {
260 return Self::new(ArchiveFormat::Rar, CompressionFormat::None);
261 }
262
263 if let Some(previous) = parts
264 .len()
265 .checked_sub(2)
266 .and_then(|index| parts.get(index))
267 .copied()
268 && let Some(encoding) = Self::from_combined_parts(previous, last)
269 {
270 return encoding;
271 }
272
273 Self::from_single_extension(last)
274 }
275
276 fn from_combined_parts(previous: &str, last: &str) -> Option<Self> {
277 let compression = Self::compression_from_extension_part(last)?;
278 let archive = match previous {
279 "tar" => ArchiveFormat::Tar,
280 "cpio" => ArchiveFormat::Cpio,
281 "warc" | "arc" => ArchiveFormat::Warc,
282 "mtree" => ArchiveFormat::Mtree,
283 _ => return None,
284 };
285
286 Some(Self::new(archive, compression))
287 }
288
289 fn compression_from_extension_part(extension: &str) -> Option<CompressionFormat> {
290 match extension {
291 "gz" | "gzip" => Some(CompressionFormat::Gzip),
292 "bz2" | "bzip2" => Some(CompressionFormat::Bzip2),
293 "xz" | "lzma" => Some(CompressionFormat::Xz),
294 "zst" | "zstd" => Some(CompressionFormat::Zstd),
295 "br" | "brotli" => Some(CompressionFormat::Brotli),
296 "lz4" => Some(CompressionFormat::Lz4),
297 _ => None,
298 }
299 }
300
301 fn is_seven_zip_volume(parts: &[&str]) -> bool {
302 let Some(last) = parts.last().copied() else {
303 return false;
304 };
305 let Some(previous) = parts
306 .len()
307 .checked_sub(2)
308 .and_then(|index| parts.get(index))
309 .copied()
310 else {
311 return false;
312 };
313
314 previous == "7z" && is_three_digit_part(last)
315 }
316
317 fn is_rar_part(parts: &[&str]) -> bool {
318 let Some(last) = parts.last().copied() else {
319 return false;
320 };
321
322 if is_rar_old_part(last) {
323 return true;
324 }
325
326 let Some(previous) = parts
327 .len()
328 .checked_sub(2)
329 .and_then(|index| parts.get(index))
330 .copied()
331 else {
332 return false;
333 };
334
335 last == "rar" && is_part_label(previous)
336 }
337
338 fn from_single_extension(extension: &str) -> Self {
339 match extension {
340 "tgz" => Self::new(ArchiveFormat::Tar, CompressionFormat::Gzip),
341 "tbz" | "tbz2" => Self::new(ArchiveFormat::Tar, CompressionFormat::Bzip2),
342 "txz" | "tlz" => Self::new(ArchiveFormat::Tar, CompressionFormat::Xz),
343 "tzst" => Self::new(ArchiveFormat::Tar, CompressionFormat::Zstd),
344 "tbr" => Self::new(ArchiveFormat::Tar, CompressionFormat::Brotli),
345 "tar" => Self::new(ArchiveFormat::Tar, CompressionFormat::None),
346 "zip" => Self::new(ArchiveFormat::Zip, CompressionFormat::None),
347 "7z" => Self::new(ArchiveFormat::SevenZip, CompressionFormat::None),
348 "cpio" => Self::new(ArchiveFormat::Cpio, CompressionFormat::None),
349 "a" | "ar" | "deb" => Self::new(ArchiveFormat::Ar, CompressionFormat::None),
350 "iso" | "img" => Self::new(ArchiveFormat::Iso, CompressionFormat::None),
351 "rar" => Self::new(ArchiveFormat::Rar, CompressionFormat::None),
352 "cab" => Self::new(ArchiveFormat::Cab, CompressionFormat::None),
353 "warc" | "arc" => Self::new(ArchiveFormat::Warc, CompressionFormat::None),
354 "mtree" => Self::new(ArchiveFormat::Mtree, CompressionFormat::None),
355 "gz" | "gzip" => Self::new(ArchiveFormat::Unknown, CompressionFormat::Gzip),
356 "bz2" | "bzip2" => Self::new(ArchiveFormat::Unknown, CompressionFormat::Bzip2),
357 "xz" | "lzma" => Self::new(ArchiveFormat::Unknown, CompressionFormat::Xz),
358 "zst" | "zstd" => Self::new(ArchiveFormat::Unknown, CompressionFormat::Zstd),
359 "br" | "brotli" => Self::new(ArchiveFormat::Unknown, CompressionFormat::Brotli),
360 "lz4" => Self::new(ArchiveFormat::Unknown, CompressionFormat::Lz4),
361 _ => Self::unknown(),
362 }
363 }
364
365 #[must_use]
367 pub fn from_filename(input: &str) -> Self {
368 Self::from_extension(input)
369 }
370
371 #[must_use]
373 pub const fn has_archive(self) -> bool {
374 self.archive.is_known()
375 }
376
377 #[must_use]
379 pub const fn is_compressed(self) -> bool {
380 self.compression.is_compressed()
381 }
382}
383
384fn is_three_digit_part(part: &str) -> bool {
385 part.len() == 3 && part.bytes().all(|byte| byte.is_ascii_digit())
386}
387
388fn is_rar_old_part(part: &str) -> bool {
389 let bytes = part.as_bytes();
390 bytes.len() == 3 && bytes[0] == b'r' && bytes[1].is_ascii_digit() && bytes[2].is_ascii_digit()
391}
392
393fn is_part_label(part: &str) -> bool {
394 let Some(number) = part.strip_prefix("part") else {
395 return false;
396 };
397
398 !number.is_empty() && number.bytes().all(|byte| byte.is_ascii_digit())
399}
400
401impl fmt::Display for ArchiveEncoding {
402 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
403 if self.compression == CompressionFormat::None {
404 formatter.write_str(self.archive.as_str())
405 } else {
406 write!(formatter, "{}+{}", self.archive, self.compression)
407 }
408 }
409}
410
411#[cfg(test)]
412mod tests {
413 use super::{ArchiveEncoding, ArchiveFormat, CompressionFormat};
414
415 #[test]
416 fn detects_common_archive_encodings() {
417 assert_eq!(
418 ArchiveEncoding::from_extension("release.tar.zst"),
419 ArchiveEncoding::new(ArchiveFormat::Tar, CompressionFormat::Zstd)
420 );
421 assert_eq!(
422 ArchiveEncoding::from_extension("bundle.tgz"),
423 ArchiveEncoding::new(ArchiveFormat::Tar, CompressionFormat::Gzip)
424 );
425 assert_eq!(
426 ArchiveEncoding::from_extension("assets.zip"),
427 ArchiveEncoding::new(ArchiveFormat::Zip, CompressionFormat::None)
428 );
429 assert_eq!(
430 ArchiveEncoding::from_extension("initramfs.cpio.gz"),
431 ArchiveEncoding::new(ArchiveFormat::Cpio, CompressionFormat::Gzip)
432 );
433 assert_eq!(
434 ArchiveEncoding::from_extension("crawl.warc.gz"),
435 ArchiveEncoding::new(ArchiveFormat::Warc, CompressionFormat::Gzip)
436 );
437 assert_eq!(
438 ArchiveEncoding::from_extension("manifest.mtree.gz"),
439 ArchiveEncoding::new(ArchiveFormat::Mtree, CompressionFormat::Gzip)
440 );
441 }
442
443 #[test]
444 fn detects_extension_labels() {
445 assert_eq!(ArchiveFormat::from_extension(".tar"), ArchiveFormat::Tar);
446 assert_eq!(
447 ArchiveFormat::from_extension("libexample.a"),
448 ArchiveFormat::Ar
449 );
450 assert_eq!(
451 ArchiveFormat::from_extension("installer.img"),
452 ArchiveFormat::Iso
453 );
454 assert_eq!(
455 ArchiveFormat::from_extension("bundle.7z.001"),
456 ArchiveFormat::SevenZip
457 );
458 assert_eq!(
459 ArchiveFormat::from_extension("backup.part1.rar"),
460 ArchiveFormat::Rar
461 );
462 assert_eq!(
463 ArchiveFormat::from_extension("driver.cab"),
464 ArchiveFormat::Cab
465 );
466 assert_eq!(
467 ArchiveFormat::from_extension("crawl.arc.gz"),
468 ArchiveFormat::Warc
469 );
470 assert_eq!(
471 CompressionFormat::from_extension("xz"),
472 CompressionFormat::Xz
473 );
474 }
475
476 #[test]
477 fn preserves_unknown_archive_and_compression() {
478 let encoding = ArchiveEncoding::from_filename("notes.txt");
479
480 assert_eq!(encoding.archive, ArchiveFormat::Unknown);
481 assert_eq!(encoding.compression, CompressionFormat::Unknown);
482 }
483}