shadowforge_lib/adapters/
archive.rs1use std::io::{Cursor, Read, Write};
4
5use bytes::Bytes;
6
7use crate::domain::archive::{MAX_NESTING_DEPTH, detect_format};
8use crate::domain::errors::ArchiveError;
9use crate::domain::ports::ArchiveHandler;
10use crate::domain::types::ArchiveFormat;
11
12const MAX_ENTRY_SIZE: u64 = 256 * 1024 * 1024;
15
16pub struct ArchiveHandlerImpl;
18
19impl Default for ArchiveHandlerImpl {
20 fn default() -> Self {
21 Self
22 }
23}
24
25impl ArchiveHandlerImpl {
26 #[must_use]
28 pub const fn new() -> Self {
29 Self
30 }
31}
32
33impl ArchiveHandler for ArchiveHandlerImpl {
34 fn pack(&self, files: &[(&str, &[u8])], format: ArchiveFormat) -> Result<Bytes, ArchiveError> {
35 match format {
36 ArchiveFormat::Zip => pack_zip(files),
37 ArchiveFormat::Tar => pack_tar(files),
38 ArchiveFormat::TarGz => pack_tar_gz(files),
39 }
40 }
41
42 fn unpack(
43 &self,
44 archive: &[u8],
45 format: ArchiveFormat,
46 ) -> Result<Vec<(String, Bytes)>, ArchiveError> {
47 unpack_recursive(archive, format, 0)
48 }
49}
50
51fn pack_zip(files: &[(&str, &[u8])]) -> Result<Bytes, ArchiveError> {
52 let buf = Vec::new();
53 let cursor = Cursor::new(buf);
54 let mut writer = zip::ZipWriter::new(cursor);
55
56 for &(name, data) in files {
57 let options = zip::write::SimpleFileOptions::default()
58 .compression_method(zip::CompressionMethod::Deflated);
59 writer
60 .start_file(name, options)
61 .map_err(|e| ArchiveError::PackFailed {
62 reason: e.to_string(),
63 })?;
64 writer
65 .write_all(data)
66 .map_err(|e| ArchiveError::PackFailed {
67 reason: e.to_string(),
68 })?;
69 }
70
71 let cursor = writer.finish().map_err(|e| ArchiveError::PackFailed {
72 reason: e.to_string(),
73 })?;
74 Ok(Bytes::from(cursor.into_inner()))
75}
76
77fn pack_tar(files: &[(&str, &[u8])]) -> Result<Bytes, ArchiveError> {
78 let buf = Vec::new();
79 let mut builder = tar::Builder::new(buf);
80
81 for &(name, data) in files {
82 let mut header = tar::Header::new_gnu();
83 header.set_size(data.len() as u64);
84 header.set_mode(0o644);
85 header.set_cksum();
86 builder
87 .append_data(&mut header, name, data)
88 .map_err(|e| ArchiveError::PackFailed {
89 reason: e.to_string(),
90 })?;
91 }
92
93 let buf = builder.into_inner().map_err(|e| ArchiveError::PackFailed {
94 reason: e.to_string(),
95 })?;
96 Ok(Bytes::from(buf))
97}
98
99fn pack_tar_gz(files: &[(&str, &[u8])]) -> Result<Bytes, ArchiveError> {
100 let buf = Vec::new();
101 let encoder = flate2::write::GzEncoder::new(buf, flate2::Compression::default());
102 let mut builder = tar::Builder::new(encoder);
103
104 for &(name, data) in files {
105 let mut header = tar::Header::new_gnu();
106 header.set_size(data.len() as u64);
107 header.set_mode(0o644);
108 header.set_cksum();
109 builder
110 .append_data(&mut header, name, data)
111 .map_err(|e| ArchiveError::PackFailed {
112 reason: e.to_string(),
113 })?;
114 }
115
116 let encoder = builder.into_inner().map_err(|e| ArchiveError::PackFailed {
117 reason: e.to_string(),
118 })?;
119 let buf = encoder.finish().map_err(|e| ArchiveError::PackFailed {
120 reason: e.to_string(),
121 })?;
122 Ok(Bytes::from(buf))
123}
124
125fn unpack_recursive(
126 archive: &[u8],
127 format: ArchiveFormat,
128 depth: u8,
129) -> Result<Vec<(String, Bytes)>, ArchiveError> {
130 let entries = match format {
131 ArchiveFormat::Zip => unpack_zip(archive)?,
132 ArchiveFormat::Tar => unpack_tar(archive)?,
133 ArchiveFormat::TarGz => unpack_tar_gz(archive)?,
134 };
135
136 if depth >= MAX_NESTING_DEPTH {
137 return Ok(entries);
138 }
139
140 let mut result = Vec::new();
142 for (name, data) in entries {
143 if let Some(nested_format) = detect_format(&data) {
144 match unpack_recursive(&data, nested_format, depth.strict_add(1)) {
145 Ok(nested_entries) => {
146 for (nested_name, nested_data) in nested_entries {
147 result.push((format!("{name}/{nested_name}"), nested_data));
148 }
149 }
150 Err(_) => {
151 result.push((name, data));
153 }
154 }
155 } else {
156 result.push((name, data));
157 }
158 }
159
160 Ok(result)
161}
162
163fn unpack_zip(archive: &[u8]) -> Result<Vec<(String, Bytes)>, ArchiveError> {
164 let cursor = Cursor::new(archive);
165 let mut reader = zip::ZipArchive::new(cursor).map_err(|e| ArchiveError::UnpackFailed {
166 reason: e.to_string(),
167 })?;
168
169 let mut entries = Vec::new();
170 for i in 0..reader.len() {
171 let file = reader.by_index(i).map_err(|e| ArchiveError::UnpackFailed {
172 reason: e.to_string(),
173 })?;
174 if file.is_dir() {
175 continue;
176 }
177 let name = file.name().to_string();
178 let mut data = Vec::new();
179 file.take(MAX_ENTRY_SIZE)
180 .read_to_end(&mut data)
181 .map_err(|e| ArchiveError::UnpackFailed {
182 reason: e.to_string(),
183 })?;
184 entries.push((name, Bytes::from(data)));
185 }
186
187 Ok(entries)
188}
189
190fn unpack_tar(archive: &[u8]) -> Result<Vec<(String, Bytes)>, ArchiveError> {
191 let cursor = Cursor::new(archive);
192 let mut reader = tar::Archive::new(cursor);
193
194 let mut entries = Vec::new();
195 for entry_result in reader.entries().map_err(|e| ArchiveError::UnpackFailed {
196 reason: e.to_string(),
197 })? {
198 let mut entry = entry_result.map_err(|e| ArchiveError::UnpackFailed {
199 reason: e.to_string(),
200 })?;
201 let path = entry
202 .path()
203 .map_err(|e| ArchiveError::UnpackFailed {
204 reason: e.to_string(),
205 })?
206 .to_string_lossy()
207 .to_string();
208 let mut data = Vec::new();
209 entry
210 .by_ref()
211 .take(MAX_ENTRY_SIZE)
212 .read_to_end(&mut data)
213 .map_err(|e| ArchiveError::UnpackFailed {
214 reason: e.to_string(),
215 })?;
216 if !data.is_empty() {
217 entries.push((path, Bytes::from(data)));
218 }
219 }
220
221 Ok(entries)
222}
223
224fn unpack_tar_gz(archive: &[u8]) -> Result<Vec<(String, Bytes)>, ArchiveError> {
225 let cursor = Cursor::new(archive);
226 let decoder = flate2::read::GzDecoder::new(cursor);
227 let mut reader = tar::Archive::new(decoder);
228
229 let mut entries = Vec::new();
230 for entry_result in reader.entries().map_err(|e| ArchiveError::UnpackFailed {
231 reason: e.to_string(),
232 })? {
233 let mut entry = entry_result.map_err(|e| ArchiveError::UnpackFailed {
234 reason: e.to_string(),
235 })?;
236 let path = entry
237 .path()
238 .map_err(|e| ArchiveError::UnpackFailed {
239 reason: e.to_string(),
240 })?
241 .to_string_lossy()
242 .to_string();
243 let mut data = Vec::new();
244 entry
245 .by_ref()
246 .take(MAX_ENTRY_SIZE)
247 .read_to_end(&mut data)
248 .map_err(|e| ArchiveError::UnpackFailed {
249 reason: e.to_string(),
250 })?;
251 if !data.is_empty() {
252 entries.push((path, Bytes::from(data)));
253 }
254 }
255
256 Ok(entries)
257}
258
259#[cfg(test)]
260mod tests {
261 use super::*;
262
263 type TestResult = Result<(), Box<dyn std::error::Error>>;
264
265 #[test]
266 fn zip_round_trip() -> TestResult {
267 let handler = ArchiveHandlerImpl::new();
268 let files = vec![
269 ("hello.txt", b"Hello, world!" as &[u8]),
270 ("data.bin", &[0xDE, 0xAD, 0xBE, 0xEF]),
271 ];
272 let packed = handler.pack(&files, ArchiveFormat::Zip)?;
273 let unpacked = handler.unpack(&packed, ArchiveFormat::Zip)?;
274
275 assert_eq!(unpacked.len(), 2);
276 assert_eq!(
277 unpacked.first().ok_or("index out of bounds")?.0,
278 "hello.txt"
279 );
280 assert_eq!(
281 unpacked.first().ok_or("index out of bounds")?.1.as_ref(),
282 b"Hello, world!"
283 );
284 assert_eq!(unpacked.get(1).ok_or("index out of bounds")?.0, "data.bin");
285 assert_eq!(
286 unpacked.get(1).ok_or("index out of bounds")?.1.as_ref(),
287 &[0xDE, 0xAD, 0xBE, 0xEF]
288 );
289 Ok(())
290 }
291
292 #[test]
293 fn tar_round_trip() -> TestResult {
294 let handler = ArchiveHandlerImpl::new();
295 let files = vec![
296 ("file_a.txt", b"AAA" as &[u8]),
297 ("file_b.txt", b"BBB" as &[u8]),
298 ];
299 let packed = handler.pack(&files, ArchiveFormat::Tar)?;
300 let unpacked = handler.unpack(&packed, ArchiveFormat::Tar)?;
301
302 assert_eq!(unpacked.len(), 2);
303 assert_eq!(
304 unpacked.first().ok_or("index out of bounds")?.1.as_ref(),
305 b"AAA"
306 );
307 assert_eq!(
308 unpacked.get(1).ok_or("index out of bounds")?.1.as_ref(),
309 b"BBB"
310 );
311 Ok(())
312 }
313
314 #[test]
315 fn tar_gz_round_trip() -> TestResult {
316 let handler = ArchiveHandlerImpl::new();
317 let files = vec![("compressed.txt", b"This is compressed" as &[u8])];
318 let packed = handler.pack(&files, ArchiveFormat::TarGz)?;
319 let unpacked = handler.unpack(&packed, ArchiveFormat::TarGz)?;
320
321 assert_eq!(unpacked.len(), 1);
322 assert_eq!(
323 unpacked.first().ok_or("index out of bounds")?.1.as_ref(),
324 b"This is compressed"
325 );
326 Ok(())
327 }
328
329 #[test]
330 fn nested_zip_in_tar() -> TestResult {
331 let handler = ArchiveHandlerImpl::new();
332
333 let inner_files = vec![("inner.txt", b"nested file content" as &[u8])];
335 let inner_zip = handler.pack(&inner_files, ArchiveFormat::Zip)?;
336
337 let outer_files = vec![("nested.zip", inner_zip.as_ref())];
339 let outer_tar = handler.pack(&outer_files, ArchiveFormat::Tar)?;
340
341 let unpacked = handler.unpack(&outer_tar, ArchiveFormat::Tar)?;
343
344 assert_eq!(unpacked.len(), 1);
346 assert_eq!(
347 unpacked.first().ok_or("index out of bounds")?.0,
348 "nested.zip/inner.txt"
349 );
350 assert_eq!(
351 unpacked.first().ok_or("index out of bounds")?.1.as_ref(),
352 b"nested file content"
353 );
354 Ok(())
355 }
356
357 #[test]
358 fn format_detection_from_packed() -> TestResult {
359 let handler = ArchiveHandlerImpl::new();
360 let files = vec![("test.txt", b"x" as &[u8])];
361
362 let zip = handler.pack(&files, ArchiveFormat::Zip)?;
363 let tar_gz = handler.pack(&files, ArchiveFormat::TarGz)?;
364
365 assert_eq!(detect_format(&zip), Some(ArchiveFormat::Zip));
366 assert_eq!(detect_format(&tar_gz), Some(ArchiveFormat::TarGz));
367 Ok(())
368 }
369
370 #[test]
371 fn unpack_invalid_zip_returns_error() {
372 let handler = ArchiveHandlerImpl::new();
373 let result = handler.unpack(b"not a zip", ArchiveFormat::Zip);
374 assert!(result.is_err());
375 }
376
377 #[test]
378 fn unpack_invalid_tar_gz_returns_error() {
379 let handler = ArchiveHandlerImpl::new();
380 let result = handler.unpack(b"not a tar.gz", ArchiveFormat::TarGz);
381 assert!(result.is_err());
382 }
383
384 #[test]
385 fn pack_empty_files_list() -> TestResult {
386 let handler = ArchiveHandlerImpl::new();
387 let files: Vec<(&str, &[u8])> = vec![];
388
389 let packed = handler.pack(&files, ArchiveFormat::Zip)?;
390 let unpacked = handler.unpack(&packed, ArchiveFormat::Zip)?;
391 assert!(unpacked.is_empty());
392
393 let packed = handler.pack(&files, ArchiveFormat::Tar)?;
394 let unpacked = handler.unpack(&packed, ArchiveFormat::Tar)?;
395 assert!(unpacked.is_empty());
396
397 let packed = handler.pack(&files, ArchiveFormat::TarGz)?;
398 let unpacked = handler.unpack(&packed, ArchiveFormat::TarGz)?;
399 assert!(unpacked.is_empty());
400
401 Ok(())
402 }
403
404 #[test]
405 fn nested_archive_fallback_on_invalid_inner() -> TestResult {
406 let handler = ArchiveHandlerImpl::new();
407 let bogus_zip = {
409 let mut v = b"PK\x03\x04".to_vec();
410 v.extend_from_slice(b"garbage that is not a valid zip");
411 v
412 };
413 let files = vec![("fake.zip", bogus_zip.as_slice())];
414 let packed = handler.pack(&files, ArchiveFormat::Tar)?;
415 let unpacked = handler.unpack(&packed, ArchiveFormat::Tar)?;
416 assert_eq!(unpacked.len(), 1);
418 assert_eq!(unpacked.first().ok_or("empty")?.0, "fake.zip");
419 Ok(())
420 }
421
422 #[test]
423 fn archive_handler_default() -> TestResult {
424 let handler = ArchiveHandlerImpl;
425 let files = vec![("t.txt", b"data" as &[u8])];
426 let packed = handler.pack(&files, ArchiveFormat::Tar)?;
427 assert!(!packed.is_empty());
428 Ok(())
429 }
430}