Skip to main content

modde_sources/decompress/
mod.rs

1//! Selective archive extraction: pull a chosen set of entries out of `zip`,
2//! `7z`, Bethesda (`BSA`/`BA2`), and (optionally) `rar` archives in one pass,
3//! validating sizes and rejecting unsafe entry paths.
4
5use std::collections::{BTreeMap, HashMap, HashSet};
6use std::fs::File;
7use std::io::{Cursor, Read as _, Seek, Write as _};
8use std::path::{Path, PathBuf};
9
10use anyhow::{Context, Result, bail};
11
12const COPY_BUFFER: usize = 1 << 20;
13
14/// How a single extracted entry should be delivered: written to disk or
15/// returned as in-memory bytes.
16#[derive(Debug, Clone)]
17pub enum ArchiveRequestKind {
18    WriteFile {
19        to: PathBuf,
20        expected_size: Option<u64>,
21    },
22    Bytes,
23}
24
25/// One requested entry: the archive path to extract, an optional nested inner
26/// path, and how to deliver it. `directive_index` ties the result back to the
27/// caller's request.
28#[derive(Debug, Clone)]
29pub struct ArchiveRequest {
30    pub directive_index: usize,
31    pub from: String,
32    pub inner_path: Option<String>,
33    pub kind: ArchiveRequestKind,
34}
35
36/// Results of a batch extraction: in-memory bytes keyed by `directive_index`
37/// for entries requested as [`ArchiveRequestKind::Bytes`].
38#[derive(Debug, Default)]
39pub struct ArchiveBatchOutput {
40    pub bytes: HashMap<usize, Vec<u8>>,
41}
42
43/// Source of an archive to extract from: a file path or in-memory bytes.
44pub enum ArchiveInput<'a> {
45    Path(&'a Path),
46    Bytes { name: &'a str, bytes: &'a [u8] },
47}
48
49/// Stateless entry point for selective, single-pass archive extraction.
50pub struct ArchiveBatchExtractor;
51
52impl ArchiveBatchExtractor {
53    /// Extract the requested entries from the archive at `path`.
54    ///
55    /// # Errors
56    ///
57    /// Returns an error for unsupported formats, unsafe entry paths, size
58    /// mismatches, or any requested entry that is missing.
59    pub fn extract_selected(
60        path: &Path,
61        requests: &[ArchiveRequest],
62    ) -> Result<ArchiveBatchOutput> {
63        Self::extract_selected_from(ArchiveInput::Path(path), requests)
64    }
65
66    /// Extract the requested entries from `input` (a path or in-memory bytes).
67    ///
68    /// # Errors
69    ///
70    /// Returns an error for unsupported formats, unsafe entry paths, size
71    /// mismatches, or any requested entry that is missing.
72    pub fn extract_selected_from(
73        input: ArchiveInput<'_>,
74        requests: &[ArchiveRequest],
75    ) -> Result<ArchiveBatchOutput> {
76        for request in requests {
77            validate_archive_entry(&request.from)?;
78            if let Some(inner_path) = &request.inner_path {
79                validate_archive_entry(inner_path)?;
80            }
81            if let ArchiveRequestKind::WriteFile { to, .. } = &request.kind
82                && let Some(parent) = to.parent()
83            {
84                std::fs::create_dir_all(parent)?;
85            }
86        }
87
88        match input {
89            ArchiveInput::Path(path) => {
90                if has_zip_magic(path).unwrap_or(false) {
91                    return extract_zip(File::open(path)?, path.display().to_string(), requests);
92                }
93
94                if modde_core::bethesda_archive::ArchiveIndex::has_bethesda_magic(path)
95                    .unwrap_or(false)
96                {
97                    return extract_bethesda(path, requests);
98                }
99
100                #[cfg(feature = "rar")]
101                if has_rar_magic(path).unwrap_or(false) {
102                    return extract_rar(path, requests);
103                }
104
105                #[cfg(not(feature = "rar"))]
106                if has_rar_magic(path).unwrap_or(false) {
107                    bail!(
108                        "RAR archive detected but modde-sources was built without the rar feature"
109                    );
110                }
111
112                if sevenz_rust2::Archive::open(path).is_ok() {
113                    return extract_seven_z(
114                        File::open(path)?,
115                        path.display().to_string(),
116                        requests,
117                    );
118                }
119
120                bail!(
121                    "unsupported archive format for {}; supported by default: zip, 7z, BSA, BA2{}",
122                    path.display(),
123                    if cfg!(feature = "rar") { ", rar" } else { "" }
124                )
125            }
126            ArchiveInput::Bytes { name, bytes } => {
127                if bytes_have_zip_magic(bytes) {
128                    return extract_zip(Cursor::new(bytes), name.to_string(), requests);
129                }
130
131                if let Ok(output) = extract_seven_z(Cursor::new(bytes), name.to_string(), requests)
132                {
133                    return Ok(output);
134                }
135
136                bail!(
137                    "unsupported in-memory archive format for {name}; supported in memory: zip, 7z"
138                )
139            }
140        }
141    }
142}
143
144fn extract_zip<R: std::io::Read + Seek>(
145    reader: R,
146    label: String,
147    requests: &[ArchiveRequest],
148) -> Result<ArchiveBatchOutput> {
149    let by_path = requests_by_normalized_path(requests);
150    let mut archive = zip::ZipArchive::new(reader)
151        .with_context(|| format!("failed to read zip archive {label}"))?;
152    let mut output = ArchiveBatchOutput::default();
153    let mut found = HashSet::new();
154
155    for index in 0..archive.len() {
156        let mut entry = archive.by_index(index)?;
157        validate_zip_entry(&entry)?;
158        let key = normalize_path(entry.name()).to_lowercase();
159        let Some(matched_requests) = by_path.get(&key) else {
160            continue;
161        };
162
163        if matched_requests
164            .iter()
165            .all(|request| request.inner_path.is_none())
166        {
167            validate_declared_entry_size(entry.size(), matched_requests)?;
168        }
169
170        if matched_requests
171            .iter()
172            .any(|request| request.inner_path.is_some())
173        {
174            let mut data = Vec::new();
175            entry.read_to_end(&mut data)?;
176            satisfy_maybe_nested_requests_from_bytes(&data, matched_requests, &mut output)?;
177        } else if matched_requests
178            .iter()
179            .any(|request| matches!(request.kind, ArchiveRequestKind::Bytes))
180        {
181            let data = read_to_vec(&mut entry, expected_write_size(matched_requests)?)?;
182            satisfy_requests_from_bytes(&data, matched_requests, &mut output)?;
183        } else {
184            let mut writers = Vec::new();
185            for request in matched_requests {
186                if let ArchiveRequestKind::WriteFile { to, .. } = &request.kind {
187                    writers
188                        .push(File::create(to).with_context(|| {
189                            format!("failed to create output {}", to.display())
190                        })?);
191                }
192            }
193            copy_streaming_to_many(
194                &mut entry,
195                &mut writers,
196                expected_write_size(matched_requests)?,
197            )?;
198        }
199
200        found.extend(
201            matched_requests
202                .iter()
203                .map(|request| request.directive_index),
204        );
205        if found.len() == requests.len() {
206            break;
207        }
208    }
209
210    ensure_all_found(&label, requests, &found)?;
211    Ok(output)
212}
213
214fn extract_bethesda(path: &Path, requests: &[ArchiveRequest]) -> Result<ArchiveBatchOutput> {
215    let index = modde_core::bethesda_archive::ArchiveIndex::read(path)
216        .with_context(|| format!("failed to read Bethesda archive {}", path.display()))?;
217    let mut output = ArchiveBatchOutput::default();
218    for request in requests {
219        if request.inner_path.is_some() {
220            let data = index.extract_file(&request.from)?;
221            satisfy_maybe_nested_requests_from_bytes(
222                &data,
223                std::slice::from_ref(request),
224                &mut output,
225            )?;
226            continue;
227        }
228        match &request.kind {
229            ArchiveRequestKind::WriteFile { to, expected_size } => {
230                let mut out = File::create(to)
231                    .with_context(|| format!("failed to create output {}", to.display()))?;
232                let mut checked = SizeCheckedWriter::new(&mut out, *expected_size);
233                index.extract_file_to_writer(&request.from, &mut checked)?;
234                checked.finish()?;
235            }
236            ArchiveRequestKind::Bytes => {
237                let data = index.extract_file(&request.from)?;
238                output.bytes.insert(request.directive_index, data);
239            }
240        }
241    }
242    Ok(output)
243}
244
245fn extract_seven_z<R: std::io::Read + Seek>(
246    reader: R,
247    label: String,
248    requests: &[ArchiveRequest],
249) -> Result<ArchiveBatchOutput> {
250    let by_path = requests_by_normalized_path(requests);
251    let mut output = ArchiveBatchOutput::default();
252    let mut found = HashSet::new();
253    let mut reader = sevenz_rust2::ArchiveReader::new(reader, sevenz_rust2::Password::empty())
254        .with_context(|| format!("failed to open 7z archive {label}"))?;
255
256    reader.for_each_entries(|entry, input| {
257        let key = normalize_path(entry.name()).to_lowercase();
258        let Some(matched_requests) = by_path.get(&key) else {
259            std::io::copy(input, &mut std::io::sink())
260                .map_err(|e| sevenz_rust2::Error::Io(e, "drain skipped entry".into()))?;
261            return Ok(true);
262        };
263        if matched_requests
264            .iter()
265            .all(|request| request.inner_path.is_none())
266        {
267            validate_declared_entry_size(entry.size(), matched_requests)
268                .map_err(|e| sevenz_rust2::Error::Io(e, "validate matched entry size".into()))?;
269        }
270        satisfy_requests_from_reader(input, matched_requests, &mut output)
271            .map_err(|e| sevenz_rust2::Error::Io(e, "extract matched entry".into()))?;
272        found.extend(
273            matched_requests
274                .iter()
275                .map(|request| request.directive_index),
276        );
277        Ok(true)
278    })?;
279
280    ensure_all_found(&label, requests, &found)?;
281    Ok(output)
282}
283
284#[cfg(feature = "rar")]
285fn extract_rar(path: &Path, requests: &[ArchiveRequest]) -> Result<ArchiveBatchOutput> {
286    let by_path = requests_by_normalized_path(requests);
287    let mut output = ArchiveBatchOutput::default();
288    let mut found = HashSet::new();
289    let mut archive = unrar::Archive::new(path)
290        .open_for_processing()
291        .with_context(|| format!("failed to open RAR archive {}", path.display()))?;
292
293    while let Some(header) = archive.read_header()? {
294        let key = normalize_path(&header.entry().filename.to_string_lossy()).to_lowercase();
295        let Some(matched_requests) = by_path.get(&key) else {
296            archive = header.skip()?;
297            continue;
298        };
299
300        let (data, next) = header.read()?;
301        satisfy_requests_from_bytes(&data, matched_requests, &mut output)?;
302        found.extend(
303            matched_requests
304                .iter()
305                .map(|request| request.directive_index),
306        );
307        archive = next;
308        if found.len() == requests.len() {
309            break;
310        }
311    }
312
313    ensure_all_found(&path.display().to_string(), requests, &found)?;
314    Ok(output)
315}
316
317fn requests_by_normalized_path(
318    requests: &[ArchiveRequest],
319) -> HashMap<String, Vec<ArchiveRequest>> {
320    let mut by_path: HashMap<String, Vec<ArchiveRequest>> = HashMap::new();
321    for request in requests {
322        by_path
323            .entry(normalize_path(&request.from).to_lowercase())
324            .or_default()
325            .push(request.clone());
326    }
327    by_path
328}
329
330fn satisfy_requests_from_reader(
331    input: &mut dyn std::io::Read,
332    requests: &[ArchiveRequest],
333    output: &mut ArchiveBatchOutput,
334) -> std::io::Result<()> {
335    if requests.iter().any(|request| request.inner_path.is_some()) {
336        let mut data = Vec::new();
337        input.read_to_end(&mut data)?;
338        satisfy_maybe_nested_requests_from_bytes(&data, requests, output)?;
339        return Ok(());
340    }
341
342    if requests
343        .iter()
344        .any(|request| matches!(request.kind, ArchiveRequestKind::Bytes))
345    {
346        let data = read_to_vec(input, expected_write_size(requests)?)?;
347        satisfy_requests_from_bytes(&data, requests, output)?;
348        return Ok(());
349    }
350
351    let mut writers = Vec::new();
352    for request in requests {
353        if let ArchiveRequestKind::WriteFile { to, .. } = &request.kind {
354            writers.push(File::create(to)?);
355        }
356    }
357    copy_streaming_to_many(input, &mut writers, expected_write_size(requests)?)?;
358    Ok(())
359}
360
361fn satisfy_requests_from_bytes(
362    data: &[u8],
363    requests: &[ArchiveRequest],
364    output: &mut ArchiveBatchOutput,
365) -> std::io::Result<()> {
366    if requests.iter().any(|request| request.inner_path.is_some()) {
367        satisfy_maybe_nested_requests_from_bytes(data, requests, output)?;
368        return Ok(());
369    }
370
371    for request in requests {
372        match &request.kind {
373            ArchiveRequestKind::WriteFile { to, expected_size } => {
374                validate_final_output_size(data.len() as u64, *expected_size)?;
375                std::fs::write(to, data)?;
376            }
377            ArchiveRequestKind::Bytes => {
378                output.bytes.insert(request.directive_index, data.to_vec());
379            }
380        }
381    }
382    Ok(())
383}
384
385fn satisfy_maybe_nested_requests_from_bytes(
386    data: &[u8],
387    requests: &[ArchiveRequest],
388    output: &mut ArchiveBatchOutput,
389) -> std::io::Result<()> {
390    let mut direct_requests = Vec::new();
391    let mut nested_requests = Vec::new();
392
393    for request in requests {
394        if let Some(inner_path) = &request.inner_path {
395            nested_requests.push(ArchiveRequest {
396                directive_index: request.directive_index,
397                from: inner_path.clone(),
398                inner_path: None,
399                kind: request.kind.clone(),
400            });
401        } else {
402            direct_requests.push(request.clone());
403        }
404    }
405
406    if !direct_requests.is_empty() {
407        satisfy_requests_from_bytes(data, &direct_requests, output)?;
408    }
409
410    if nested_requests.is_empty() {
411        return Ok(());
412    }
413
414    let nested_output = if bytes_have_bethesda_magic(data) {
415        let mut temp = tempfile::NamedTempFile::new()?;
416        temp.write_all(data)?;
417        temp.flush()?;
418        ArchiveBatchExtractor::extract_selected(temp.path(), &nested_requests)
419    } else {
420        ArchiveBatchExtractor::extract_selected_from(
421            ArchiveInput::Bytes {
422                name: "nested archive",
423                bytes: data,
424            },
425            &nested_requests,
426        )
427    }
428    .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, format!("{e:#}")))?;
429
430    output.bytes.extend(nested_output.bytes);
431    Ok(())
432}
433
434fn copy_streaming_to_many(
435    input: &mut dyn std::io::Read,
436    outputs: &mut [File],
437    expected_size: Option<u64>,
438) -> std::io::Result<u64> {
439    let mut buf = vec![0_u8; COPY_BUFFER];
440    let mut total = 0_u64;
441    loop {
442        let n = input.read(&mut buf)?;
443        if n == 0 {
444            break;
445        }
446        total += n as u64;
447        validate_output_size(total, expected_size)?;
448        for output in outputs.iter_mut() {
449            output.write_all(&buf[..n])?;
450        }
451    }
452    for output in outputs {
453        output.flush()?;
454    }
455    validate_final_output_size(total, expected_size)?;
456    Ok(total)
457}
458
459fn read_to_vec(
460    input: &mut dyn std::io::Read,
461    expected_size: Option<u64>,
462) -> std::io::Result<Vec<u8>> {
463    let mut buf = vec![0_u8; COPY_BUFFER];
464    let mut data = Vec::new();
465    let mut total = 0_u64;
466    loop {
467        let n = input.read(&mut buf)?;
468        if n == 0 {
469            break;
470        }
471        total += n as u64;
472        validate_output_size(total, expected_size)?;
473        data.extend_from_slice(&buf[..n]);
474    }
475    validate_final_output_size(total, expected_size)?;
476    Ok(data)
477}
478
479fn expected_write_size(requests: &[ArchiveRequest]) -> std::io::Result<Option<u64>> {
480    let mut expected = None;
481    for request in requests {
482        let ArchiveRequestKind::WriteFile {
483            expected_size: Some(size),
484            ..
485        } = request.kind
486        else {
487            continue;
488        };
489        if let Some(previous) = expected
490            && previous != size
491        {
492            return Err(std::io::Error::new(
493                std::io::ErrorKind::InvalidData,
494                format!(
495                    "duplicate archive entry has inconsistent expected sizes: {previous} and {size}"
496                ),
497            ));
498        }
499        expected = Some(size);
500    }
501    Ok(expected)
502}
503
504fn validate_declared_entry_size(actual: u64, requests: &[ArchiveRequest]) -> std::io::Result<()> {
505    if let Some(expected) = expected_write_size(requests)?
506        && actual != expected
507    {
508        return Err(std::io::Error::new(
509            std::io::ErrorKind::InvalidData,
510            format!("archive entry size mismatch: expected {expected}, got {actual}"),
511        ));
512    }
513    Ok(())
514}
515
516fn validate_output_size(actual: u64, expected: Option<u64>) -> std::io::Result<()> {
517    if let Some(expected) = expected
518        && actual > expected
519    {
520        return Err(std::io::Error::new(
521            std::io::ErrorKind::InvalidData,
522            format!(
523                "archive entry output exceeds expected size: expected {expected}, got {actual}"
524            ),
525        ));
526    }
527    Ok(())
528}
529
530fn validate_final_output_size(actual: u64, expected: Option<u64>) -> std::io::Result<()> {
531    validate_output_size(actual, expected)?;
532    if let Some(expected) = expected
533        && actual != expected
534    {
535        return Err(std::io::Error::new(
536            std::io::ErrorKind::InvalidData,
537            format!("archive entry output size mismatch: expected {expected}, got {actual}"),
538        ));
539    }
540    Ok(())
541}
542
543struct SizeCheckedWriter<W> {
544    inner: W,
545    expected_size: Option<u64>,
546    written: u64,
547}
548
549impl<W> SizeCheckedWriter<W> {
550    fn new(inner: W, expected_size: Option<u64>) -> Self {
551        Self {
552            inner,
553            expected_size,
554            written: 0,
555        }
556    }
557
558    fn finish(&self) -> std::io::Result<()> {
559        validate_final_output_size(self.written, self.expected_size)
560    }
561}
562
563impl<W: std::io::Write> std::io::Write for SizeCheckedWriter<W> {
564    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
565        let next = self.written.saturating_add(buf.len() as u64);
566        validate_output_size(next, self.expected_size)?;
567        let written = self.inner.write(buf)?;
568        self.written += written as u64;
569        Ok(written)
570    }
571
572    fn flush(&mut self) -> std::io::Result<()> {
573        self.inner.flush()
574    }
575}
576
577fn ensure_all_found(
578    label: &str,
579    requests: &[ArchiveRequest],
580    found: &HashSet<usize>,
581) -> Result<()> {
582    let missing: Vec<&ArchiveRequest> = requests
583        .iter()
584        .filter(|request| !found.contains(&request.directive_index))
585        .collect();
586    if !missing.is_empty() {
587        let mut unique = BTreeMap::<&str, usize>::new();
588        for request in &missing {
589            *unique.entry(request.from.as_str()).or_default() += 1;
590        }
591        let shown = unique
592            .into_iter()
593            .map(|(path, count)| {
594                if count == 1 {
595                    path.to_string()
596                } else {
597                    format!("{path} (x{count})")
598                }
599            })
600            .collect::<Vec<_>>();
601        bail!(
602            "{} requested entr{} missing from {} ({} unique): {}",
603            missing.len(),
604            if missing.len() == 1 { "y" } else { "ies" },
605            label,
606            shown.len(),
607            shown.join(", ")
608        );
609    }
610    Ok(())
611}
612
613fn has_rar_magic(path: &Path) -> std::io::Result<bool> {
614    let mut file = File::open(path)?;
615    let mut magic = [0_u8; 8];
616    let len = file.read(&mut magic)?;
617    Ok(magic[..len].starts_with(b"Rar!\x1A\x07\x00")
618        || magic[..len].starts_with(b"Rar!\x1A\x07\x01\x00"))
619}
620
621fn has_zip_magic(path: &Path) -> std::io::Result<bool> {
622    let mut file = File::open(path)?;
623    let mut magic = [0_u8; 4];
624    let len = file.read(&mut magic)?;
625    Ok(bytes_have_zip_magic(&magic[..len]))
626}
627
628fn bytes_have_zip_magic(bytes: &[u8]) -> bool {
629    bytes.starts_with(b"PK\x03\x04")
630        || bytes.starts_with(b"PK\x05\x06")
631        || bytes.starts_with(b"PK\x07\x08")
632}
633
634fn bytes_have_bethesda_magic(bytes: &[u8]) -> bool {
635    bytes.starts_with(b"BSA\0") || bytes.starts_with(b"BTDX")
636}
637
638fn validate_zip_entry<R: std::io::Read + ?Sized>(entry: &zip::read::ZipFile<'_, R>) -> Result<()> {
639    validate_archive_entry(entry.name())?;
640    if entry.is_symlink() {
641        bail!("archive entry is a symlink (rejected): {}", entry.name());
642    }
643    Ok(())
644}
645
646fn validate_archive_entry(name: &str) -> Result<()> {
647    let normalized = normalize_path(name);
648    if normalized.starts_with('/') {
649        bail!("archive entry contains absolute path: {name}");
650    }
651    if normalized.split('/').any(|component| component == "..") {
652        bail!("archive entry contains path traversal: {name}");
653    }
654    Ok(())
655}
656
657fn normalize_path(path: &str) -> String {
658    path.replace('\\', "/")
659}
660
661#[cfg(test)]
662mod tests {
663    use super::*;
664    use std::process::Command;
665
666    fn write_zip(path: &Path, entries: &[(&str, &[u8])]) {
667        let file = File::create(path).unwrap();
668        let mut zip = zip::ZipWriter::new(file);
669        let options = zip::write::SimpleFileOptions::default()
670            .compression_method(zip::CompressionMethod::Deflated);
671        for (name, data) in entries {
672            zip.start_file(*name, options).unwrap();
673            zip.write_all(data).unwrap();
674        }
675        zip.finish().unwrap();
676    }
677
678    fn zip_bytes(entries: &[(&str, &[u8])]) -> Vec<u8> {
679        let mut cursor = Cursor::new(Vec::new());
680        {
681            let mut zip = zip::ZipWriter::new(&mut cursor);
682            let options = zip::write::SimpleFileOptions::default()
683                .compression_method(zip::CompressionMethod::Deflated);
684            for (name, data) in entries {
685                zip.start_file(*name, options).unwrap();
686                zip.write_all(data).unwrap();
687            }
688            zip.finish().unwrap();
689        }
690        cursor.into_inner()
691    }
692
693    fn write_7z(path: &Path, entries: &[(&str, &[u8])]) {
694        let temp = tempfile::tempdir().unwrap();
695        for (name, data) in entries {
696            let file_path = temp.path().join(name.replace('\\', "/"));
697            std::fs::create_dir_all(file_path.parent().unwrap()).unwrap();
698            std::fs::write(file_path, data).unwrap();
699        }
700
701        let status = Command::new("7zz")
702            .arg("a")
703            .arg("-t7z")
704            .arg("-mx=1")
705            .arg(path)
706            .arg(".")
707            .current_dir(temp.path())
708            .stdout(std::process::Stdio::null())
709            .stderr(std::process::Stdio::null())
710            .status()
711            .unwrap();
712        assert!(status.success(), "7zz failed to create fixture archive");
713    }
714
715    #[test]
716    fn zip_batch_writes_files_and_returns_bytes() {
717        let temp = tempfile::tempdir().unwrap();
718        let archive = temp.path().join("fixture.zip");
719        write_zip(
720            &archive,
721            &[("Data/A.txt", b"alpha"), ("Data/B.txt", b"beta")],
722        );
723        let out = temp.path().join("out").join("a.txt");
724
725        let output = ArchiveBatchExtractor::extract_selected(
726            &archive,
727            &[
728                ArchiveRequest {
729                    directive_index: 7,
730                    from: "data/a.txt".to_string(),
731                    inner_path: None,
732                    kind: ArchiveRequestKind::WriteFile {
733                        to: out.clone(),
734                        expected_size: None,
735                    },
736                },
737                ArchiveRequest {
738                    directive_index: 9,
739                    from: "Data/B.txt".to_string(),
740                    inner_path: None,
741                    kind: ArchiveRequestKind::Bytes,
742                },
743            ],
744        )
745        .unwrap();
746
747        assert_eq!(std::fs::read(out).unwrap(), b"alpha");
748        assert_eq!(output.bytes.get(&9).unwrap(), b"beta");
749    }
750
751    #[test]
752    fn zip_write_rejects_entry_larger_than_expected_size() {
753        let temp = tempfile::tempdir().unwrap();
754        let archive = temp.path().join("fixture.zip");
755        write_zip(&archive, &[("Data/A.txt", b"alpha")]);
756        let out = temp.path().join("out").join("a.txt");
757
758        let err = ArchiveBatchExtractor::extract_selected(
759            &archive,
760            &[ArchiveRequest {
761                directive_index: 7,
762                from: "data/a.txt".to_string(),
763                inner_path: None,
764                kind: ArchiveRequestKind::WriteFile {
765                    to: out.clone(),
766                    expected_size: Some(3),
767                },
768            }],
769        )
770        .unwrap_err();
771
772        assert!(format!("{err:#}").contains("entry size mismatch"));
773    }
774
775    #[test]
776    fn zip_write_rejects_entry_smaller_than_expected_size() {
777        let temp = tempfile::tempdir().unwrap();
778        let archive = temp.path().join("fixture.zip");
779        write_zip(&archive, &[("Data/A.txt", b"alpha")]);
780        let out = temp.path().join("out").join("a.txt");
781
782        let err = ArchiveBatchExtractor::extract_selected(
783            &archive,
784            &[ArchiveRequest {
785                directive_index: 7,
786                from: "data/a.txt".to_string(),
787                inner_path: None,
788                kind: ArchiveRequestKind::WriteFile {
789                    to: out.clone(),
790                    expected_size: Some(6),
791                },
792            }],
793        )
794        .unwrap_err();
795
796        assert!(format!("{err:#}").contains("entry size mismatch"));
797    }
798
799    #[test]
800    fn traversal_request_is_rejected_before_writing() {
801        let temp = tempfile::tempdir().unwrap();
802        let archive = temp.path().join("fixture.zip");
803        write_zip(&archive, &[("safe.txt", b"ok")]);
804        let out = temp.path().join("out.txt");
805
806        let err = ArchiveBatchExtractor::extract_selected(
807            &archive,
808            &[ArchiveRequest {
809                directive_index: 1,
810                from: "../escape.txt".to_string(),
811                inner_path: None,
812                kind: ArchiveRequestKind::WriteFile {
813                    to: out.clone(),
814                    expected_size: Some(10),
815                },
816            }],
817        )
818        .unwrap_err();
819
820        assert!(format!("{err:#}").contains("path traversal"));
821        assert!(!out.exists());
822    }
823
824    #[test]
825    fn in_memory_zip_batch_writes_files_and_returns_bytes() {
826        let temp = tempfile::tempdir().unwrap();
827        let archive = zip_bytes(&[("Data/A.txt", b"alpha"), ("Data/B.txt", b"beta")]);
828        let out = temp.path().join("out").join("a.txt");
829
830        let output = ArchiveBatchExtractor::extract_selected_from(
831            ArchiveInput::Bytes {
832                name: "fixture.zip",
833                bytes: &archive,
834            },
835            &[
836                ArchiveRequest {
837                    directive_index: 7,
838                    from: "data/a.txt".to_string(),
839                    inner_path: None,
840                    kind: ArchiveRequestKind::WriteFile {
841                        to: out.clone(),
842                        expected_size: None,
843                    },
844                },
845                ArchiveRequest {
846                    directive_index: 9,
847                    from: "Data/B.txt".to_string(),
848                    inner_path: None,
849                    kind: ArchiveRequestKind::Bytes,
850                },
851            ],
852        )
853        .unwrap();
854
855        assert_eq!(std::fs::read(out).unwrap(), b"alpha");
856        assert_eq!(output.bytes.get(&9).unwrap(), b"beta");
857    }
858
859    #[test]
860    fn seven_z_duplicate_write_requests_share_one_entry() {
861        let temp = tempfile::tempdir().unwrap();
862        let archive = temp.path().join("fixture.7z");
863        write_7z(&archive, &[("Data/Dupe.txt", b"same bytes")]);
864        let out_a = temp.path().join("out-a.txt");
865        let out_b = temp.path().join("out-b.txt");
866
867        ArchiveBatchExtractor::extract_selected(
868            &archive,
869            &[
870                ArchiveRequest {
871                    directive_index: 1,
872                    from: "data/dupe.txt".to_string(),
873                    inner_path: None,
874                    kind: ArchiveRequestKind::WriteFile {
875                        to: out_a.clone(),
876                        expected_size: None,
877                    },
878                },
879                ArchiveRequest {
880                    directive_index: 2,
881                    from: "Data\\Dupe.txt".to_string(),
882                    inner_path: None,
883                    kind: ArchiveRequestKind::WriteFile {
884                        to: out_b.clone(),
885                        expected_size: None,
886                    },
887                },
888            ],
889        )
890        .unwrap();
891
892        assert_eq!(std::fs::read(out_a).unwrap(), b"same bytes");
893        assert_eq!(std::fs::read(out_b).unwrap(), b"same bytes");
894    }
895
896    #[test]
897    fn seven_z_duplicate_bytes_and_write_requests_share_one_entry() {
898        let temp = tempfile::tempdir().unwrap();
899        let archive = temp.path().join("fixture.7z");
900        write_7z(&archive, &[("Data/Dupe.txt", b"shared bytes")]);
901        let out = temp.path().join("out.txt");
902
903        let output = ArchiveBatchExtractor::extract_selected(
904            &archive,
905            &[
906                ArchiveRequest {
907                    directive_index: 1,
908                    from: "Data/Dupe.txt".to_string(),
909                    inner_path: None,
910                    kind: ArchiveRequestKind::Bytes,
911                },
912                ArchiveRequest {
913                    directive_index: 2,
914                    from: "data\\dupe.txt".to_string(),
915                    inner_path: None,
916                    kind: ArchiveRequestKind::WriteFile {
917                        to: out.clone(),
918                        expected_size: None,
919                    },
920                },
921            ],
922        )
923        .unwrap();
924
925        assert_eq!(output.bytes.get(&1).unwrap(), b"shared bytes");
926        assert_eq!(std::fs::read(out).unwrap(), b"shared bytes");
927    }
928
929    #[test]
930    fn seven_z_duplicate_bytes_and_write_rejects_size_mismatch() {
931        let temp = tempfile::tempdir().unwrap();
932        let archive = temp.path().join("fixture.7z");
933        write_7z(&archive, &[("Data/Dupe.txt", b"shared bytes")]);
934        let out = temp.path().join("out.txt");
935
936        let err = ArchiveBatchExtractor::extract_selected(
937            &archive,
938            &[
939                ArchiveRequest {
940                    directive_index: 1,
941                    from: "Data/Dupe.txt".to_string(),
942                    inner_path: None,
943                    kind: ArchiveRequestKind::Bytes,
944                },
945                ArchiveRequest {
946                    directive_index: 2,
947                    from: "data\\dupe.txt".to_string(),
948                    inner_path: None,
949                    kind: ArchiveRequestKind::WriteFile {
950                        to: out.clone(),
951                        expected_size: Some(3),
952                    },
953                },
954            ],
955        )
956        .unwrap_err();
957
958        let msg = format!("{err:#}");
959        assert!(msg.contains("exceeds expected size") || msg.contains("entry size mismatch"));
960        assert!(!out.exists());
961    }
962
963    #[tokio::test]
964    async fn zip_entry_can_satisfy_nested_bsa_request() {
965        let temp = tempfile::tempdir().unwrap();
966        let bsa_root = temp.path().join("bsa-root");
967        let source_path = bsa_root.join("meshes/actors/test.nif");
968        std::fs::create_dir_all(source_path.parent().unwrap()).unwrap();
969        std::fs::write(&source_path, b"nested nif").unwrap();
970
971        let bsa_path = temp.path().join("inner.bsa");
972        crate::wabbajack::bsa_repack::create_bsa(
973            &[modde_core::manifest::wabbajack::BSAFileState {
974                path: "meshes/actors/test.nif".to_string(),
975                hash: 0,
976                size: 10,
977            }],
978            &bsa_root,
979            &bsa_path,
980        )
981        .await
982        .unwrap();
983
984        let archive = temp.path().join("outer.zip");
985        let bsa_bytes = std::fs::read(&bsa_path).unwrap();
986        write_zip(&archive, &[("Inner.bsa", &bsa_bytes)]);
987        let out = temp.path().join("out.nif");
988
989        ArchiveBatchExtractor::extract_selected(
990            &archive,
991            &[ArchiveRequest {
992                directive_index: 42,
993                from: "inner.bsa".to_string(),
994                inner_path: Some("meshes\\actors\\test.nif".to_string()),
995                kind: ArchiveRequestKind::WriteFile {
996                    to: out.clone(),
997                    expected_size: None,
998                },
999            }],
1000        )
1001        .unwrap();
1002
1003        assert_eq!(std::fs::read(out).unwrap(), b"nested nif");
1004    }
1005
1006    #[test]
1007    fn missing_entry_error_deduplicates_repeated_paths() {
1008        let temp = tempfile::tempdir().unwrap();
1009        let archive = temp.path().join("fixture.7z");
1010        write_7z(&archive, &[("Data/Present.txt", b"present")]);
1011
1012        let err = ArchiveBatchExtractor::extract_selected(
1013            &archive,
1014            &[
1015                ArchiveRequest {
1016                    directive_index: 1,
1017                    from: "Data/Missing.txt".to_string(),
1018                    inner_path: None,
1019                    kind: ArchiveRequestKind::Bytes,
1020                },
1021                ArchiveRequest {
1022                    directive_index: 2,
1023                    from: "Data/Missing.txt".to_string(),
1024                    inner_path: None,
1025                    kind: ArchiveRequestKind::Bytes,
1026                },
1027            ],
1028        )
1029        .unwrap_err();
1030        let msg = format!("{err:#}");
1031
1032        assert!(msg.contains("2 requested entries missing"));
1033        assert!(msg.contains("1 unique"));
1034        assert!(msg.contains("Data/Missing.txt (x2)"));
1035    }
1036
1037    #[test]
1038    #[cfg(not(feature = "rar"))]
1039    fn rar_magic_without_rar_feature_reports_explicit_error() {
1040        let temp = tempfile::tempdir().unwrap();
1041        let archive = temp.path().join("fixture.archive");
1042        std::fs::write(&archive, b"Rar!\x1A\x07\x01\x00not a complete rar").unwrap();
1043
1044        let err = ArchiveBatchExtractor::extract_selected(
1045            &archive,
1046            &[ArchiveRequest {
1047                directive_index: 1,
1048                from: "file.txt".to_string(),
1049                inner_path: None,
1050                kind: ArchiveRequestKind::Bytes,
1051            }],
1052        )
1053        .unwrap_err();
1054
1055        assert!(
1056            format!("{err:#}").contains(
1057                "RAR archive detected but modde-sources was built without the rar feature"
1058            )
1059        );
1060    }
1061
1062    #[test]
1063    #[cfg(feature = "rar")]
1064    fn rar_magic_with_archive_extension_routes_to_rar_reader() {
1065        let temp = tempfile::tempdir().unwrap();
1066        let archive = temp.path().join("fixture.archive");
1067        std::fs::write(&archive, b"Rar!\x1A\x07\x01\x00not a complete rar").unwrap();
1068
1069        let err = ArchiveBatchExtractor::extract_selected(
1070            &archive,
1071            &[ArchiveRequest {
1072                directive_index: 1,
1073                from: "file.txt".to_string(),
1074                inner_path: None,
1075                kind: ArchiveRequestKind::Bytes,
1076            }],
1077        )
1078        .unwrap_err();
1079        let msg = format!("{err:#}");
1080
1081        assert!(!msg.contains("unsupported archive format"), "{msg}");
1082        assert!(
1083            msg.contains("failed to open RAR archive") || msg.contains("requested entry missing"),
1084            "{msg}"
1085        );
1086    }
1087}