Skip to main content

shadowforge_lib/adapters/
archive.rs

1//! Adapter implementing the [`ArchiveHandler`] port for ZIP, TAR, and TAR.GZ.
2
3use std::io::{Cursor, Read, Write};
4
5use bytes::Bytes;
6
7use crate::domain::archive::{MAX_NESTING_DEPTH, detect_format};
8use crate::domain::errors::ArchiveError;
9use crate::domain::ports::ArchiveHandler;
10use crate::domain::types::ArchiveFormat;
11
12/// Maximum decompressed size per archive entry (256 MiB).
13/// Prevents zip-bomb / tar-bomb `DoS` from exhausting memory.
14const MAX_ENTRY_SIZE: u64 = 256 * 1024 * 1024;
15
16/// Concrete [`ArchiveHandler`] implementation using in-memory buffers.
17pub struct ArchiveHandlerImpl;
18
19impl Default for ArchiveHandlerImpl {
20    fn default() -> Self {
21        Self
22    }
23}
24
25impl ArchiveHandlerImpl {
26    /// Create a new archive handler.
27    #[must_use]
28    pub const fn new() -> Self {
29        Self
30    }
31}
32
33impl ArchiveHandler for ArchiveHandlerImpl {
34    fn pack(&self, files: &[(&str, &[u8])], format: ArchiveFormat) -> Result<Bytes, ArchiveError> {
35        match format {
36            ArchiveFormat::Zip => pack_zip(files),
37            ArchiveFormat::Tar => pack_tar(files),
38            ArchiveFormat::TarGz => pack_tar_gz(files),
39        }
40    }
41
42    fn unpack(
43        &self,
44        archive: &[u8],
45        format: ArchiveFormat,
46    ) -> Result<Vec<(String, Bytes)>, ArchiveError> {
47        unpack_recursive(archive, format, 0)
48    }
49}
50
51fn pack_zip(files: &[(&str, &[u8])]) -> Result<Bytes, ArchiveError> {
52    let buf = Vec::new();
53    let cursor = Cursor::new(buf);
54    let mut writer = zip::ZipWriter::new(cursor);
55
56    for &(name, data) in files {
57        let options = zip::write::SimpleFileOptions::default()
58            .compression_method(zip::CompressionMethod::Deflated);
59        writer
60            .start_file(name, options)
61            .map_err(|e| ArchiveError::PackFailed {
62                reason: e.to_string(),
63            })?;
64        writer
65            .write_all(data)
66            .map_err(|e| ArchiveError::PackFailed {
67                reason: e.to_string(),
68            })?;
69    }
70
71    let cursor = writer.finish().map_err(|e| ArchiveError::PackFailed {
72        reason: e.to_string(),
73    })?;
74    Ok(Bytes::from(cursor.into_inner()))
75}
76
77fn pack_tar(files: &[(&str, &[u8])]) -> Result<Bytes, ArchiveError> {
78    let buf = Vec::new();
79    let mut builder = tar::Builder::new(buf);
80
81    for &(name, data) in files {
82        let mut header = tar::Header::new_gnu();
83        header.set_size(data.len() as u64);
84        header.set_mode(0o644);
85        header.set_cksum();
86        builder
87            .append_data(&mut header, name, data)
88            .map_err(|e| ArchiveError::PackFailed {
89                reason: e.to_string(),
90            })?;
91    }
92
93    let buf = builder.into_inner().map_err(|e| ArchiveError::PackFailed {
94        reason: e.to_string(),
95    })?;
96    Ok(Bytes::from(buf))
97}
98
99fn pack_tar_gz(files: &[(&str, &[u8])]) -> Result<Bytes, ArchiveError> {
100    let buf = Vec::new();
101    let encoder = flate2::write::GzEncoder::new(buf, flate2::Compression::default());
102    let mut builder = tar::Builder::new(encoder);
103
104    for &(name, data) in files {
105        let mut header = tar::Header::new_gnu();
106        header.set_size(data.len() as u64);
107        header.set_mode(0o644);
108        header.set_cksum();
109        builder
110            .append_data(&mut header, name, data)
111            .map_err(|e| ArchiveError::PackFailed {
112                reason: e.to_string(),
113            })?;
114    }
115
116    let encoder = builder.into_inner().map_err(|e| ArchiveError::PackFailed {
117        reason: e.to_string(),
118    })?;
119    let buf = encoder.finish().map_err(|e| ArchiveError::PackFailed {
120        reason: e.to_string(),
121    })?;
122    Ok(Bytes::from(buf))
123}
124
125fn unpack_recursive(
126    archive: &[u8],
127    format: ArchiveFormat,
128    depth: u8,
129) -> Result<Vec<(String, Bytes)>, ArchiveError> {
130    let entries = match format {
131        ArchiveFormat::Zip => unpack_zip(archive)?,
132        ArchiveFormat::Tar => unpack_tar(archive)?,
133        ArchiveFormat::TarGz => unpack_tar_gz(archive)?,
134    };
135
136    if depth >= MAX_NESTING_DEPTH {
137        return Ok(entries);
138    }
139
140    // Check for nested archives
141    let mut result = Vec::new();
142    for (name, data) in entries {
143        if let Some(nested_format) = detect_format(&data) {
144            match unpack_recursive(&data, nested_format, depth.strict_add(1)) {
145                Ok(nested_entries) => {
146                    for (nested_name, nested_data) in nested_entries {
147                        result.push((format!("{name}/{nested_name}"), nested_data));
148                    }
149                }
150                Err(_) => {
151                    // Not actually a valid archive, treat as regular file
152                    result.push((name, data));
153                }
154            }
155        } else {
156            result.push((name, data));
157        }
158    }
159
160    Ok(result)
161}
162
163fn unpack_zip(archive: &[u8]) -> Result<Vec<(String, Bytes)>, ArchiveError> {
164    let cursor = Cursor::new(archive);
165    let mut reader = zip::ZipArchive::new(cursor).map_err(|e| ArchiveError::UnpackFailed {
166        reason: e.to_string(),
167    })?;
168
169    let mut entries = Vec::new();
170    for i in 0..reader.len() {
171        let file = reader.by_index(i).map_err(|e| ArchiveError::UnpackFailed {
172            reason: e.to_string(),
173        })?;
174        if file.is_dir() {
175            continue;
176        }
177        let name = file.name().to_string();
178        let mut data = Vec::new();
179        file.take(MAX_ENTRY_SIZE)
180            .read_to_end(&mut data)
181            .map_err(|e| ArchiveError::UnpackFailed {
182                reason: e.to_string(),
183            })?;
184        entries.push((name, Bytes::from(data)));
185    }
186
187    Ok(entries)
188}
189
190fn unpack_tar(archive: &[u8]) -> Result<Vec<(String, Bytes)>, ArchiveError> {
191    let cursor = Cursor::new(archive);
192    let mut reader = tar::Archive::new(cursor);
193
194    let mut entries = Vec::new();
195    for entry_result in reader.entries().map_err(|e| ArchiveError::UnpackFailed {
196        reason: e.to_string(),
197    })? {
198        let mut entry = entry_result.map_err(|e| ArchiveError::UnpackFailed {
199            reason: e.to_string(),
200        })?;
201        let path = entry
202            .path()
203            .map_err(|e| ArchiveError::UnpackFailed {
204                reason: e.to_string(),
205            })?
206            .to_string_lossy()
207            .to_string();
208        let mut data = Vec::new();
209        entry
210            .by_ref()
211            .take(MAX_ENTRY_SIZE)
212            .read_to_end(&mut data)
213            .map_err(|e| ArchiveError::UnpackFailed {
214                reason: e.to_string(),
215            })?;
216        if !data.is_empty() {
217            entries.push((path, Bytes::from(data)));
218        }
219    }
220
221    Ok(entries)
222}
223
224fn unpack_tar_gz(archive: &[u8]) -> Result<Vec<(String, Bytes)>, ArchiveError> {
225    let cursor = Cursor::new(archive);
226    let decoder = flate2::read::GzDecoder::new(cursor);
227    let mut reader = tar::Archive::new(decoder);
228
229    let mut entries = Vec::new();
230    for entry_result in reader.entries().map_err(|e| ArchiveError::UnpackFailed {
231        reason: e.to_string(),
232    })? {
233        let mut entry = entry_result.map_err(|e| ArchiveError::UnpackFailed {
234            reason: e.to_string(),
235        })?;
236        let path = entry
237            .path()
238            .map_err(|e| ArchiveError::UnpackFailed {
239                reason: e.to_string(),
240            })?
241            .to_string_lossy()
242            .to_string();
243        let mut data = Vec::new();
244        entry
245            .by_ref()
246            .take(MAX_ENTRY_SIZE)
247            .read_to_end(&mut data)
248            .map_err(|e| ArchiveError::UnpackFailed {
249                reason: e.to_string(),
250            })?;
251        if !data.is_empty() {
252            entries.push((path, Bytes::from(data)));
253        }
254    }
255
256    Ok(entries)
257}
258
259#[cfg(test)]
260mod tests {
261    use super::*;
262
263    type TestResult = Result<(), Box<dyn std::error::Error>>;
264
265    #[test]
266    fn zip_round_trip() -> TestResult {
267        let handler = ArchiveHandlerImpl::new();
268        let files = vec![
269            ("hello.txt", b"Hello, world!" as &[u8]),
270            ("data.bin", &[0xDE, 0xAD, 0xBE, 0xEF]),
271        ];
272        let packed = handler.pack(&files, ArchiveFormat::Zip)?;
273        let unpacked = handler.unpack(&packed, ArchiveFormat::Zip)?;
274
275        assert_eq!(unpacked.len(), 2);
276        assert_eq!(
277            unpacked.first().ok_or("index out of bounds")?.0,
278            "hello.txt"
279        );
280        assert_eq!(
281            unpacked.first().ok_or("index out of bounds")?.1.as_ref(),
282            b"Hello, world!"
283        );
284        assert_eq!(unpacked.get(1).ok_or("index out of bounds")?.0, "data.bin");
285        assert_eq!(
286            unpacked.get(1).ok_or("index out of bounds")?.1.as_ref(),
287            &[0xDE, 0xAD, 0xBE, 0xEF]
288        );
289        Ok(())
290    }
291
292    #[test]
293    fn tar_round_trip() -> TestResult {
294        let handler = ArchiveHandlerImpl::new();
295        let files = vec![
296            ("file_a.txt", b"AAA" as &[u8]),
297            ("file_b.txt", b"BBB" as &[u8]),
298        ];
299        let packed = handler.pack(&files, ArchiveFormat::Tar)?;
300        let unpacked = handler.unpack(&packed, ArchiveFormat::Tar)?;
301
302        assert_eq!(unpacked.len(), 2);
303        assert_eq!(
304            unpacked.first().ok_or("index out of bounds")?.1.as_ref(),
305            b"AAA"
306        );
307        assert_eq!(
308            unpacked.get(1).ok_or("index out of bounds")?.1.as_ref(),
309            b"BBB"
310        );
311        Ok(())
312    }
313
314    #[test]
315    fn tar_gz_round_trip() -> TestResult {
316        let handler = ArchiveHandlerImpl::new();
317        let files = vec![("compressed.txt", b"This is compressed" as &[u8])];
318        let packed = handler.pack(&files, ArchiveFormat::TarGz)?;
319        let unpacked = handler.unpack(&packed, ArchiveFormat::TarGz)?;
320
321        assert_eq!(unpacked.len(), 1);
322        assert_eq!(
323            unpacked.first().ok_or("index out of bounds")?.1.as_ref(),
324            b"This is compressed"
325        );
326        Ok(())
327    }
328
329    #[test]
330    fn nested_zip_in_tar() -> TestResult {
331        let handler = ArchiveHandlerImpl::new();
332
333        // Create inner ZIP
334        let inner_files = vec![("inner.txt", b"nested file content" as &[u8])];
335        let inner_zip = handler.pack(&inner_files, ArchiveFormat::Zip)?;
336
337        // Create outer TAR containing the ZIP
338        let outer_files = vec![("nested.zip", inner_zip.as_ref())];
339        let outer_tar = handler.pack(&outer_files, ArchiveFormat::Tar)?;
340
341        // Unpack with nesting
342        let unpacked = handler.unpack(&outer_tar, ArchiveFormat::Tar)?;
343
344        // Should find the nested file with prefixed path
345        assert_eq!(unpacked.len(), 1);
346        assert_eq!(
347            unpacked.first().ok_or("index out of bounds")?.0,
348            "nested.zip/inner.txt"
349        );
350        assert_eq!(
351            unpacked.first().ok_or("index out of bounds")?.1.as_ref(),
352            b"nested file content"
353        );
354        Ok(())
355    }
356
357    #[test]
358    fn format_detection_from_packed() -> TestResult {
359        let handler = ArchiveHandlerImpl::new();
360        let files = vec![("test.txt", b"x" as &[u8])];
361
362        let zip = handler.pack(&files, ArchiveFormat::Zip)?;
363        let tar_gz = handler.pack(&files, ArchiveFormat::TarGz)?;
364
365        assert_eq!(detect_format(&zip), Some(ArchiveFormat::Zip));
366        assert_eq!(detect_format(&tar_gz), Some(ArchiveFormat::TarGz));
367        Ok(())
368    }
369
370    #[test]
371    fn unpack_invalid_zip_returns_error() {
372        let handler = ArchiveHandlerImpl::new();
373        let result = handler.unpack(b"not a zip", ArchiveFormat::Zip);
374        assert!(result.is_err());
375    }
376
377    #[test]
378    fn unpack_invalid_tar_gz_returns_error() {
379        let handler = ArchiveHandlerImpl::new();
380        let result = handler.unpack(b"not a tar.gz", ArchiveFormat::TarGz);
381        assert!(result.is_err());
382    }
383
384    #[test]
385    fn pack_empty_files_list() -> TestResult {
386        let handler = ArchiveHandlerImpl::new();
387        let files: Vec<(&str, &[u8])> = vec![];
388
389        let packed = handler.pack(&files, ArchiveFormat::Zip)?;
390        let unpacked = handler.unpack(&packed, ArchiveFormat::Zip)?;
391        assert!(unpacked.is_empty());
392
393        let packed = handler.pack(&files, ArchiveFormat::Tar)?;
394        let unpacked = handler.unpack(&packed, ArchiveFormat::Tar)?;
395        assert!(unpacked.is_empty());
396
397        let packed = handler.pack(&files, ArchiveFormat::TarGz)?;
398        let unpacked = handler.unpack(&packed, ArchiveFormat::TarGz)?;
399        assert!(unpacked.is_empty());
400
401        Ok(())
402    }
403
404    #[test]
405    fn nested_archive_fallback_on_invalid_inner() -> TestResult {
406        let handler = ArchiveHandlerImpl::new();
407        // Create a TAR that has a file with a ZIP magic header but invalid content
408        let bogus_zip = {
409            let mut v = b"PK\x03\x04".to_vec();
410            v.extend_from_slice(b"garbage that is not a valid zip");
411            v
412        };
413        let files = vec![("fake.zip", bogus_zip.as_slice())];
414        let packed = handler.pack(&files, ArchiveFormat::Tar)?;
415        let unpacked = handler.unpack(&packed, ArchiveFormat::Tar)?;
416        // Should fall back to treating fake.zip as a regular file
417        assert_eq!(unpacked.len(), 1);
418        assert_eq!(unpacked.first().ok_or("empty")?.0, "fake.zip");
419        Ok(())
420    }
421
422    #[test]
423    fn archive_handler_default() -> TestResult {
424        let handler = ArchiveHandlerImpl;
425        let files = vec![("t.txt", b"data" as &[u8])];
426        let packed = handler.pack(&files, ArchiveFormat::Tar)?;
427        assert!(!packed.is_empty());
428        Ok(())
429    }
430}