Skip to main content

subx_cli/core/archive/
mod.rs

1//! Archive extraction support for SubX.
2//!
3//! Provides transparent extraction of archive files supplied as direct `-i`
4//! inputs. Archives discovered during directory traversal are NOT extracted.
5//!
6//! # Module Structure
7//!
8//! Each supported format lives in its own sub-module, while shared security
9//! validation (path-traversal checks, decompression-bomb limits) is
10//! centralised in the `common` module.
11//!
12//! - `common` — Shared validation: `validate_entry_path`,
13//!   `ExtractionLimits`, size/count constants.
14//! - `zip` — ZIP extraction (always available, pure Rust).
15//! - `rar` — RAR extraction (feature-gated `archive-rar`).
16//! - `sevenz` — 7-Zip extraction (always available, pure Rust).
17//! - `targz` — Tar-gzip extraction (always available, pure Rust).
18//!
19//! # Supported Formats
20//!
21//! | Extension(s)         | Module   | Crate(s)                | Feature gate |
22//! |----------------------|----------|-------------------------|--------------|
23//! | `.zip`               | `zip`    | `zip`                   | always-on    |
24//! | `.rar`               | `rar`    | `unrar` / `unrar_sys`   | `archive-rar`|
25//! | `.7z`                | `sevenz` | `sevenz-rust`           | always-on    |
26//! | `.tar.gz` / `.tgz`   | `targz`  | `tar` + `flate2`        | always-on    |
27//!
28//! # Security
29//!
30//! All extraction operations enforce:
31//! - Path traversal prevention (zip-slip)
32//! - Symlink and hardlink rejection
33//! - Decompression bomb protection (1 GiB size limit, 10,000 entry limit)
34
35mod common;
36mod rar;
37mod sevenz;
38mod targz;
39mod zip;
40
41use std::io;
42use std::path::{Path, PathBuf};
43
44/// Recognised archive formats.
45#[derive(Debug, Clone, Copy, PartialEq, Eq)]
46pub enum ArchiveFormat {
47    /// ZIP archive (`.zip`).
48    Zip,
49    /// RAR archive (`.rar`).
50    Rar,
51    /// 7-Zip archive (`.7z`).
52    SevenZip,
53    /// Tar-gzip archive (`.tar.gz` or `.tgz`).
54    TarGz,
55}
56
57/// Detects archive format by file extension (case-insensitive).
58///
59/// For `.tar.gz`, the function checks whether the filename ends with
60/// `.tar.gz` (case-insensitive) before falling through to single-extension
61/// matching. Returns `None` for unrecognised extensions. No magic-byte
62/// sniffing is performed.
63pub fn detect_format(path: &Path) -> Option<ArchiveFormat> {
64    // Compound extension check: .tar.gz
65    if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
66        let lower = name.to_ascii_lowercase();
67        if lower.ends_with(".tar.gz") {
68            return Some(ArchiveFormat::TarGz);
69        }
70    }
71
72    // Single extension check
73    let ext = path.extension()?.to_str()?.to_ascii_lowercase();
74    match ext.as_str() {
75        "zip" => Some(ArchiveFormat::Zip),
76        "rar" => Some(ArchiveFormat::Rar),
77        "7z" => Some(ArchiveFormat::SevenZip),
78        "tgz" => Some(ArchiveFormat::TarGz),
79        _ => None,
80    }
81}
82
83/// Extracts an archive to the given destination directory.
84///
85/// Dispatches to the appropriate format-specific extractor based on
86/// [`detect_format`]. Returns the list of extracted file paths.
87///
88/// # Errors
89///
90/// Returns an error if the archive format is unrecognised or extraction
91/// fails (corrupted, password-protected, etc.).
92pub fn extract_archive(archive_path: &Path, dest_dir: &Path) -> io::Result<Vec<PathBuf>> {
93    let format = detect_format(archive_path).ok_or_else(|| {
94        io::Error::new(
95            io::ErrorKind::InvalidInput,
96            format!("Unrecognised archive format: {}", archive_path.display()),
97        )
98    })?;
99
100    match format {
101        ArchiveFormat::Zip => zip::extract_zip(archive_path, dest_dir),
102        ArchiveFormat::Rar => rar::extract_rar(archive_path, dest_dir),
103        ArchiveFormat::SevenZip => sevenz::extract_7z(archive_path, dest_dir),
104        ArchiveFormat::TarGz => targz::extract_tar_gz(archive_path, dest_dir),
105    }
106}
107
108#[cfg(test)]
109mod tests {
110    use super::*;
111
112    #[test]
113    fn test_detect_format_zip() {
114        assert_eq!(
115            detect_format(Path::new("test.zip")),
116            Some(ArchiveFormat::Zip)
117        );
118    }
119
120    #[test]
121    fn test_detect_format_zip_uppercase() {
122        assert_eq!(
123            detect_format(Path::new("test.ZIP")),
124            Some(ArchiveFormat::Zip)
125        );
126    }
127
128    #[test]
129    fn test_detect_format_rar() {
130        assert_eq!(
131            detect_format(Path::new("test.rar")),
132            Some(ArchiveFormat::Rar)
133        );
134    }
135
136    #[test]
137    fn test_detect_format_rar_mixed_case() {
138        assert_eq!(
139            detect_format(Path::new("test.Rar")),
140            Some(ArchiveFormat::Rar)
141        );
142    }
143
144    #[test]
145    fn test_detect_format_7z() {
146        assert_eq!(
147            detect_format(Path::new("test.7z")),
148            Some(ArchiveFormat::SevenZip)
149        );
150    }
151
152    #[test]
153    fn test_detect_format_7z_uppercase() {
154        assert_eq!(
155            detect_format(Path::new("test.7Z")),
156            Some(ArchiveFormat::SevenZip)
157        );
158    }
159
160    #[test]
161    fn test_detect_format_tar_gz() {
162        assert_eq!(
163            detect_format(Path::new("test.tar.gz")),
164            Some(ArchiveFormat::TarGz)
165        );
166    }
167
168    #[test]
169    fn test_detect_format_tar_gz_uppercase() {
170        assert_eq!(
171            detect_format(Path::new("test.TAR.GZ")),
172            Some(ArchiveFormat::TarGz)
173        );
174    }
175
176    #[test]
177    fn test_detect_format_tgz() {
178        assert_eq!(
179            detect_format(Path::new("test.tgz")),
180            Some(ArchiveFormat::TarGz)
181        );
182    }
183
184    #[test]
185    fn test_detect_format_tgz_uppercase() {
186        assert_eq!(
187            detect_format(Path::new("test.TGZ")),
188            Some(ArchiveFormat::TarGz)
189        );
190    }
191
192    #[test]
193    fn test_detect_format_tar_bz2_none() {
194        assert_eq!(detect_format(Path::new("test.tar.bz2")), None);
195    }
196
197    #[test]
198    fn test_detect_format_plain_gz_none() {
199        assert_eq!(detect_format(Path::new("test.gz")), None);
200    }
201
202    #[test]
203    fn test_detect_format_srt_none() {
204        assert_eq!(detect_format(Path::new("test.srt")), None);
205    }
206
207    #[test]
208    fn test_detect_format_no_extension_none() {
209        assert_eq!(detect_format(Path::new("testfile")), None);
210    }
211
212    #[test]
213    fn test_extract_archive_unknown_format() {
214        let tmp = tempfile::tempdir().unwrap();
215        let path = tmp.path().join("test.tar.bz2");
216        std::fs::File::create(&path).unwrap();
217
218        let result = extract_archive(&path, tmp.path());
219        assert!(result.is_err());
220        assert!(result.unwrap_err().to_string().contains("Unrecognised"));
221    }
222
223    #[cfg(not(feature = "archive-rar"))]
224    #[test]
225    fn test_extract_rar_disabled_feature() {
226        let tmp = tempfile::tempdir().unwrap();
227        let path = tmp.path().join("test.rar");
228        std::fs::File::create(&path).unwrap();
229
230        let result = extract_archive(&path, tmp.path());
231        assert!(result.is_err());
232        assert!(result.unwrap_err().to_string().contains("not compiled in"));
233    }
234}