Skip to main content

fstool/fs/archive/
sevenz.rs

1//! 7-Zip (`.7z`) reader.
2//!
3//! Recognised by `detect_fs` via the `37 7A BC AF 27 1C` signature.
4//!
5//! With the `sevenz` Cargo feature this is a real **read-only** reader for the
6//! common single-coder case. It parses the 7z container — the 32-byte
7//! signature header, the (optionally LZMA-packed `kEncodedHeader`) end header,
8//! `StreamsInfo` (pack info, folders/coders, substreams) and `FilesInfo`
9//! (UTF-16 names, empty-stream / empty-file vectors) — and maps every file to
10//! its folder substream.
11//!
12//! Decoding is wired for folders whose single coder is **Copy**, **LZMA**,
13//! **BZip2** or **Deflate** (solid folders are sliced per substream, decoded
14//! once on demand). Folders using **LZMA2**, BCJ/Delta branch filters, PPMd,
15//! AES (encryption) or any multi-coder pipeline list correctly but read as a
16//! clean `Unsupported`, pending raw-LZMA2 + branch-filter codecs in `compcol`.
17//!
18//! Without the `sevenz` feature this stays a detection-only scaffold.
19
20use std::path::Path;
21
22use crate::Result;
23use crate::block::BlockDevice;
24use crate::fs::archive::ArchiveFs;
25use crate::fs::{DirEntry, FileAttrs, FileReadHandle, Filesystem, MutationCapability};
26
27/// 7z filesystem handle.
28pub struct SevenZFs {
29    fs: ArchiveFs,
30    #[cfg(feature = "sevenz")]
31    inner: imp::Inner,
32}
33
34impl SevenZFs {
35    #[cfg(feature = "sevenz")]
36    pub fn open(dev: &mut dyn BlockDevice) -> Result<Self> {
37        let p = imp::scan(dev)?;
38        Ok(Self {
39            fs: ArchiveFs::from_index(p.index),
40            inner: p.inner,
41        })
42    }
43
44    #[cfg(not(feature = "sevenz"))]
45    pub fn open(_dev: &mut dyn BlockDevice) -> Result<Self> {
46        Ok(Self {
47            fs: ArchiveFs::scaffold("7z"),
48        })
49    }
50
51    pub fn format(_dev: &mut dyn BlockDevice, _opts: &()) -> Result<Self> {
52        Err(crate::Error::Unsupported(
53            "7z: creating archives is not supported".into(),
54        ))
55    }
56}
57
58impl crate::fs::FilesystemFactory for SevenZFs {
59    type FormatOpts = ();
60    fn format(dev: &mut dyn BlockDevice, opts: &Self::FormatOpts) -> Result<Self> {
61        Self::format(dev, opts)
62    }
63    fn open(dev: &mut dyn BlockDevice) -> Result<Self> {
64        Self::open(dev)
65    }
66}
67
68impl Filesystem for SevenZFs {
69    fn create_file(
70        &mut self,
71        dev: &mut dyn BlockDevice,
72        path: &Path,
73        src: crate::fs::FileSource,
74        meta: crate::fs::FileMeta,
75    ) -> Result<()> {
76        self.fs.create_file(dev, path, src, meta)
77    }
78
79    fn create_dir(
80        &mut self,
81        dev: &mut dyn BlockDevice,
82        path: &Path,
83        meta: crate::fs::FileMeta,
84    ) -> Result<()> {
85        self.fs.create_dir(dev, path, meta)
86    }
87
88    fn create_symlink(
89        &mut self,
90        dev: &mut dyn BlockDevice,
91        path: &Path,
92        target: &Path,
93        meta: crate::fs::FileMeta,
94    ) -> Result<()> {
95        self.fs.create_symlink(dev, path, target, meta)
96    }
97
98    fn create_device(
99        &mut self,
100        dev: &mut dyn BlockDevice,
101        path: &Path,
102        kind: crate::fs::DeviceKind,
103        major: u32,
104        minor: u32,
105        meta: crate::fs::FileMeta,
106    ) -> Result<()> {
107        self.fs.create_device(dev, path, kind, major, minor, meta)
108    }
109
110    fn remove(&mut self, dev: &mut dyn BlockDevice, path: &Path) -> Result<()> {
111        self.fs.remove(dev, path)
112    }
113
114    fn list(&mut self, dev: &mut dyn BlockDevice, path: &Path) -> Result<Vec<DirEntry>> {
115        self.fs.list(dev, path)
116    }
117
118    fn read_file<'a>(
119        &'a mut self,
120        dev: &'a mut dyn BlockDevice,
121        path: &Path,
122    ) -> Result<Box<dyn std::io::Read + 'a>> {
123        #[cfg(feature = "sevenz")]
124        match self.inner.lookup(path)? {
125            imp::Lookup::File(loc) => return self.inner.open_file(dev, &loc),
126            imp::Lookup::Unsupported(reason) => return Err(crate::Error::Unsupported(reason)),
127            imp::Lookup::NotRegular => {}
128        }
129        self.fs.read_file(dev, path)
130    }
131
132    fn open_file_ro<'a>(
133        &'a mut self,
134        dev: &'a mut dyn BlockDevice,
135        path: &Path,
136    ) -> Result<Box<dyn FileReadHandle + 'a>> {
137        #[cfg(feature = "sevenz")]
138        match self.inner.lookup(path)? {
139            imp::Lookup::File(loc) => {
140                use std::io::Read;
141                let mut r = self.inner.open_file(dev, &loc)?;
142                let mut bytes = Vec::new();
143                r.read_to_end(&mut bytes).map_err(crate::Error::from)?;
144                return Ok(Box::new(imp::mem_handle(bytes)));
145            }
146            imp::Lookup::Unsupported(reason) => return Err(crate::Error::Unsupported(reason)),
147            imp::Lookup::NotRegular => {}
148        }
149        self.fs.open_file_ro(dev, path)
150    }
151
152    fn flush(&mut self, dev: &mut dyn BlockDevice) -> Result<()> {
153        self.fs.flush(dev)
154    }
155
156    fn streams_immediately(&self) -> bool {
157        self.fs.streams_immediately()
158    }
159
160    fn image_len(&self) -> Option<u64> {
161        self.fs.image_len()
162    }
163
164    fn mutation_capability(&self) -> MutationCapability {
165        self.fs.mutation_capability()
166    }
167
168    fn read_symlink(
169        &mut self,
170        dev: &mut dyn BlockDevice,
171        path: &Path,
172    ) -> Result<std::path::PathBuf> {
173        self.fs.read_symlink(dev, path)
174    }
175
176    fn getattr(&mut self, dev: &mut dyn BlockDevice, path: &Path) -> Result<FileAttrs> {
177        self.fs.getattr(dev, path)
178    }
179}
180
181#[cfg(feature = "sevenz")]
182mod imp {
183    //! 7z container parser + single-coder folder decode.
184
185    use std::collections::HashMap;
186    use std::io::{self, Read};
187    use std::path::Path;
188
189    use compcol::Algorithm;
190
191    use crate::block::BlockDevice;
192    use crate::fs::archive::reader::BoundedDevReader;
193    use crate::fs::archive::{ArchiveEntry, ArchiveIndex, EntryKind};
194    use crate::{Error, Result};
195
196    /// Upper bound on the decoded size of an LZMA-packed `kEncodedHeader`.
197    /// A real archive's header is tiny; this only exists to stop a malformed
198    /// archive from decompression-bombing us at open time.
199    const MAX_DECODED_HEADER: u64 = 64 << 20;
200
201    // Property IDs.
202    const K_HEADER: u8 = 0x01;
203    const K_MAIN_STREAMS_INFO: u8 = 0x04;
204    const K_FILES_INFO: u8 = 0x05;
205    const K_PACK_INFO: u8 = 0x06;
206    const K_UNPACK_INFO: u8 = 0x07;
207    const K_SUBSTREAMS_INFO: u8 = 0x08;
208    const K_SIZE: u8 = 0x09;
209    const K_CRC: u8 = 0x0A;
210    const K_FOLDER: u8 = 0x0B;
211    const K_CODERS_UNPACK_SIZE: u8 = 0x0C;
212    const K_NUM_UNPACK_STREAM: u8 = 0x0D;
213    const K_EMPTY_STREAM: u8 = 0x0E;
214    const K_EMPTY_FILE: u8 = 0x0F;
215    const K_NAME: u8 = 0x11;
216    const K_ENCODED_HEADER: u8 = 0x17;
217
218    /// One coder inside a folder.
219    struct Coder {
220        id: Vec<u8>,
221        n_in: u64,
222        n_out: u64,
223        attr: Vec<u8>,
224    }
225
226    /// A folder = a coder pipeline producing one logical output stream.
227    struct Folder {
228        coders: Vec<Coder>,
229        num_bind_pairs: u64,
230        unpack_sizes: Vec<u64>, // one per coder output stream, in order
231        num_substreams: u64,
232        substream_sizes: Vec<u64>, // length == num_substreams
233    }
234
235    impl Folder {
236        fn total_in(&self) -> u64 {
237            self.coders.iter().map(|c| c.n_in).sum()
238        }
239        fn total_out(&self) -> u64 {
240            self.coders.iter().map(|c| c.n_out).sum()
241        }
242        fn num_packed_streams(&self) -> u64 {
243            self.total_in() - self.num_bind_pairs
244        }
245        /// The folder's overall uncompressed size = its last coder's output.
246        fn unpack_size(&self) -> u64 {
247            *self.unpack_sizes.last().unwrap_or(&0)
248        }
249    }
250
251    /// Precomputed decode info for a folder (single-coder only).
252    struct FolderRun {
253        pack_offset: u64,
254        pack_size: u64,
255        unpack_size: u64,
256        coder_id: Vec<u8>,
257        coder_attr: Vec<u8>,
258        /// Cumulative substream offsets within the decoded folder output
259        /// (length == num_substreams + 1).
260        sub_offsets: Vec<u64>,
261        decodable: Option<String>, // Some(reason) if the folder can't be decoded
262    }
263
264    /// A file's location: folder + byte slice within the folder output.
265    #[derive(Clone)]
266    pub struct FileLoc {
267        pub folder: usize,
268        pub off: u64,
269        pub len: u64,
270    }
271
272    pub enum Entry {
273        File(FileLoc),
274        Unsupported(String),
275    }
276
277    pub enum Lookup {
278        File(FileLoc),
279        Unsupported(String),
280        NotRegular,
281    }
282
283    pub struct Inner {
284        folders: Vec<FolderRun>,
285        files: HashMap<String, Entry>,
286    }
287
288    pub struct Parsed {
289        pub index: ArchiveIndex,
290        pub inner: Inner,
291    }
292
293    // ─── byte-cursor with the 7z number / bitvector primitives ──────────────
294
295    struct Cur<'a> {
296        b: &'a [u8],
297        p: usize,
298    }
299    impl<'a> Cur<'a> {
300        fn new(b: &'a [u8]) -> Self {
301            Cur { b, p: 0 }
302        }
303        fn byte(&mut self) -> Result<u8> {
304            let v = *self
305                .b
306                .get(self.p)
307                .ok_or_else(|| Error::InvalidImage("7z: truncated header".into()))?;
308            self.p += 1;
309            Ok(v)
310        }
311        fn bytes(&mut self, n: usize) -> Result<&'a [u8]> {
312            let s = self
313                .b
314                .get(self.p..self.p + n)
315                .ok_or_else(|| Error::InvalidImage("7z: truncated header field".into()))?;
316            self.p += n;
317            Ok(s)
318        }
319        /// 7z variable-length number (REAL_UINT64).
320        fn num(&mut self) -> Result<u64> {
321            let first = self.byte()?;
322            let mut mask = 0x80u8;
323            let mut val = 0u64;
324            for i in 0..8 {
325                if first & mask == 0 {
326                    val |= ((first & (mask.wrapping_sub(1))) as u64) << (8 * i);
327                    return Ok(val);
328                }
329                val |= (self.byte()? as u64) << (8 * i);
330                mask >>= 1;
331            }
332            Ok(val)
333        }
334        fn usize_num(&mut self) -> Result<usize> {
335            let n = self.num()?;
336            usize::try_from(n).map_err(|_| Error::InvalidImage("7z: number too large".into()))
337        }
338        /// Bytes left in the header buffer after the cursor.
339        fn remaining(&self) -> usize {
340            self.b.len().saturating_sub(self.p)
341        }
342        /// Read a count and reject it if it could not possibly be backed by
343        /// the header bytes that remain. Each counted element consumes at
344        /// least one further header byte (a `num()` is ≥1 byte, a coder is
345        /// ≥1 byte, …), so a count exceeding the remaining length is an
346        /// attacker-inflated value and would only drive a huge allocation.
347        fn bounded_count(&mut self) -> Result<usize> {
348            let n = self.usize_num()?;
349            if n > self.remaining() {
350                return Err(Error::InvalidImage("7z: count exceeds header size".into()));
351            }
352            Ok(n)
353        }
354        /// Read a bit vector of `n` bits (MSB first within each byte).
355        fn bits(&mut self, n: usize) -> Result<Vec<bool>> {
356            // A bitvector of n bits is backed by ceil(n/8) header bytes;
357            // reject an inflated count before allocating n bools.
358            if n.div_ceil(8) > self.remaining() {
359                return Err(Error::InvalidImage(
360                    "7z: bitvector exceeds header size".into(),
361                ));
362            }
363            let mut out = Vec::with_capacity(n);
364            let mut cur = 0u8;
365            let mut mask = 0u8;
366            for _ in 0..n {
367                if mask == 0 {
368                    cur = self.byte()?;
369                    mask = 0x80;
370                }
371                out.push(cur & mask != 0);
372                mask >>= 1;
373            }
374            Ok(out)
375        }
376        /// A bit vector that may be prefixed by an "all defined" byte.
377        fn bits_all_defined(&mut self, n: usize) -> Result<Vec<bool>> {
378            if self.byte()? != 0 {
379                Ok(vec![true; n])
380            } else {
381                self.bits(n)
382            }
383        }
384    }
385
386    fn read_at(dev: &mut dyn BlockDevice, off: u64, len: usize) -> Result<Vec<u8>> {
387        let mut buf = vec![0u8; len];
388        dev.read_at(off, &mut buf)?;
389        Ok(buf)
390    }
391
392    // ─── StreamsInfo parsing ────────────────────────────────────────────────
393
394    struct StreamsInfo {
395        pack_pos: u64,
396        pack_sizes: Vec<u64>,
397        folders: Vec<Folder>,
398    }
399
400    fn parse_streams_info(c: &mut Cur) -> Result<StreamsInfo> {
401        let mut pack_pos = 0u64;
402        let mut pack_sizes = Vec::new();
403        let mut folders: Vec<Folder> = Vec::new();
404
405        let mut id = c.byte()?;
406        if id == K_PACK_INFO {
407            pack_pos = c.num()?;
408            let n = c.bounded_count()?;
409            loop {
410                let pid = c.byte()?;
411                if pid == K_SIZE {
412                    pack_sizes = (0..n).map(|_| c.num()).collect::<Result<_>>()?;
413                } else if pid == 0 {
414                    break;
415                } else {
416                    // skip kCRC etc. inside packinfo — not expected, bail.
417                    return Err(Error::InvalidImage(
418                        "7z: unexpected packinfo property".into(),
419                    ));
420                }
421            }
422            id = c.byte()?;
423        }
424        if id == K_UNPACK_INFO {
425            let fid = c.byte()?;
426            if fid != K_FOLDER {
427                return Err(Error::InvalidImage("7z: expected kFolder".into()));
428            }
429            let num_folders = c.bounded_count()?;
430            let external = c.byte()?;
431            if external != 0 {
432                return Err(Error::Unsupported(
433                    "7z: external folder definitions not supported".into(),
434                ));
435            }
436            for _ in 0..num_folders {
437                folders.push(parse_folder(c)?);
438            }
439            // kCodersUnpackSize: one size per output stream across all folders.
440            let usid = c.byte()?;
441            if usid != K_CODERS_UNPACK_SIZE {
442                return Err(Error::InvalidImage("7z: expected kCodersUnpackSize".into()));
443            }
444            for f in folders.iter_mut() {
445                let n_out = f.total_out() as usize;
446                // Each output size is a `num()` consuming ≥1 header byte.
447                if n_out > c.remaining() {
448                    return Err(Error::InvalidImage(
449                        "7z: coder unpack-size count exceeds header size".into(),
450                    ));
451                }
452                f.unpack_sizes = (0..n_out).map(|_| c.num()).collect::<Result<_>>()?;
453            }
454            // Optional kCRC then kEnd.
455            loop {
456                let pid = c.byte()?;
457                if pid == 0 {
458                    break;
459                } else if pid == K_CRC {
460                    let defined = c.bits_all_defined(folders.len())?;
461                    let ndef = defined.iter().filter(|&&d| d).count();
462                    c.bytes(ndef * 4)?; // skip CRCs
463                } else {
464                    return Err(Error::InvalidImage(
465                        "7z: unexpected unpackinfo property".into(),
466                    ));
467                }
468            }
469            id = c.byte()?;
470        }
471
472        // Default: one substream per folder, sized to the folder's unpack size.
473        for f in folders.iter_mut() {
474            f.num_substreams = 1;
475            f.substream_sizes = vec![f.unpack_size()];
476        }
477
478        if id == K_SUBSTREAMS_INFO {
479            parse_substreams_info(c, &mut folders)?;
480            id = c.byte()?;
481        }
482        if id != 0 {
483            return Err(Error::InvalidImage(
484                "7z: expected kEnd of StreamsInfo".into(),
485            ));
486        }
487        Ok(StreamsInfo {
488            pack_pos,
489            pack_sizes,
490            folders,
491        })
492    }
493
494    fn parse_folder(c: &mut Cur) -> Result<Folder> {
495        let num_coders = c.bounded_count()?;
496        let mut coders = Vec::with_capacity(num_coders);
497        for _ in 0..num_coders {
498            let flag = c.byte()?;
499            let id_size = (flag & 0x0F) as usize;
500            let id = c.bytes(id_size)?.to_vec();
501            let (n_in, n_out) = if flag & 0x10 != 0 {
502                (c.num()?, c.num()?)
503            } else {
504                (1, 1)
505            };
506            let attr = if flag & 0x20 != 0 {
507                let ps = c.usize_num()?;
508                c.bytes(ps)?.to_vec()
509            } else {
510                Vec::new()
511            };
512            coders.push(Coder {
513                id,
514                n_in,
515                n_out,
516                attr,
517            });
518        }
519        let total_out: u64 = coders.iter().map(|c| c.n_out).sum();
520        let total_in: u64 = coders.iter().map(|c| c.n_in).sum();
521        // A folder with no output streams is malformed; the bind-pair count
522        // is `total_out - 1`, which would underflow (panic in debug) here.
523        if total_out == 0 {
524            return Err(Error::InvalidImage(
525                "7z: folder has no output streams".into(),
526            ));
527        }
528        let num_bind_pairs = total_out - 1;
529        // Each bind pair is two `num()`s (≥1 byte each); reject an inflated
530        // count before looping over attacker-controlled iterations.
531        if num_bind_pairs > c.remaining() as u64 {
532            return Err(Error::InvalidImage(
533                "7z: bind-pair count exceeds header size".into(),
534            ));
535        }
536        for _ in 0..num_bind_pairs {
537            c.num()?; // in index
538            c.num()?; // out index
539        }
540        let num_packed = total_in.checked_sub(num_bind_pairs).ok_or_else(|| {
541            Error::InvalidImage("7z: bind pairs exceed coder input streams".into())
542        })?;
543        if num_packed > 1 {
544            for _ in 0..num_packed {
545                c.num()?; // packed stream index
546            }
547        }
548        Ok(Folder {
549            coders,
550            num_bind_pairs,
551            unpack_sizes: Vec::new(),
552            num_substreams: 1,
553            substream_sizes: Vec::new(),
554        })
555    }
556
557    fn parse_substreams_info(c: &mut Cur, folders: &mut [Folder]) -> Result<()> {
558        let mut id = c.byte()?;
559        if id == K_NUM_UNPACK_STREAM {
560            for f in folders.iter_mut() {
561                let n = c.num()?;
562                // Each substream beyond the first costs a `num()` size entry
563                // (≥1 header byte); reject a wildly inflated count up front so
564                // the later `with_capacity(n)` can't be driven to OOM.
565                if n > c.remaining() as u64 + 1 {
566                    return Err(Error::InvalidImage(
567                        "7z: substream count exceeds header size".into(),
568                    ));
569                }
570                f.num_substreams = n;
571            }
572            id = c.byte()?;
573        }
574        // kSize: for each folder with >1 substream, all-but-last sizes; the
575        // last is the remainder of the folder's unpack size.
576        // (When num_substreams == 1, the single size IS the folder size.)
577        for f in folders.iter_mut() {
578            let total = f.unpack_size();
579            let n = f.num_substreams;
580            if n == 0 {
581                f.substream_sizes = Vec::new();
582                continue;
583            }
584            let mut sizes = Vec::with_capacity(n as usize);
585            let mut sum = 0u64;
586            if id == K_SIZE {
587                for _ in 0..n - 1 {
588                    let s = c.num()?;
589                    sum += s;
590                    sizes.push(s);
591                }
592            }
593            sizes.push(total.saturating_sub(sum));
594            f.substream_sizes = sizes;
595        }
596        if id == K_SIZE {
597            id = c.byte()?;
598        }
599        // Skip an optional kCRC then kEnd.
600        loop {
601            if id == 0 {
602                break;
603            } else if id == K_CRC {
604                let total_streams: usize = folders.iter().map(|f| f.num_substreams as usize).sum();
605                let defined = c.bits_all_defined(total_streams)?;
606                let ndef = defined.iter().filter(|&&d| d).count();
607                c.bytes(ndef * 4)?;
608            } else {
609                return Err(Error::InvalidImage(
610                    "7z: unexpected substreams property".into(),
611                ));
612            }
613            id = c.byte()?;
614        }
615        Ok(())
616    }
617
618    // ─── single-coder folder decode ─────────────────────────────────────────
619
620    /// Build a reader over a folder's whole decompressed output. Only
621    /// single-coder Copy / LZMA / BZip2 / Deflate folders are supported.
622    fn folder_output_reader<'a>(
623        dev: &'a mut dyn BlockDevice,
624        run: &FolderRun,
625    ) -> Result<Box<dyn Read + 'a>> {
626        let packed = BoundedDevReader::new(dev, run.pack_offset, run.pack_size);
627        match run.coder_id.as_slice() {
628            // Copy.
629            [0x00] => Ok(Box::new(packed)),
630            // LZMA: synthesize a `.lzma`-alone header (5 props + 8-byte size)
631            // from the coder attributes + folder unpack size, then decode.
632            [0x03, 0x01, 0x01] => {
633                if run.coder_attr.len() < 5 {
634                    return Err(Error::InvalidImage("7z: LZMA props too short".into()));
635                }
636                let mut header = Vec::with_capacity(13);
637                header.extend_from_slice(&run.coder_attr[..5]);
638                header.extend_from_slice(&run.unpack_size.to_le_bytes());
639                let framed = io::Cursor::new(header).chain(packed);
640                Ok(Box::new(compcol::io::DecoderReader::new(
641                    framed,
642                    compcol::lzma::Lzma::decoder(),
643                )))
644            }
645            // BZip2 (standard .bz2 stream).
646            [0x04, 0x02, 0x02] => Ok(Box::new(compcol::io::DecoderReader::new(
647                packed,
648                compcol::bzip2::Bzip2::decoder(),
649            ))),
650            // Deflate (raw).
651            [0x04, 0x01, 0x08] => Ok(Box::new(compcol::io::DecoderReader::new(
652                packed,
653                compcol::deflate::Deflate::decoder(),
654            ))),
655            other => Err(Error::Unsupported(format!(
656                "7z: coder {} not supported",
657                hex(other)
658            ))),
659        }
660    }
661
662    fn hex(b: &[u8]) -> String {
663        b.iter().map(|x| format!("{x:02x}")).collect()
664    }
665
666    impl Inner {
667        pub fn lookup(&self, path: &Path) -> Result<Lookup> {
668            let s = path
669                .to_str()
670                .ok_or_else(|| Error::InvalidArgument("7z: non-UTF-8 path".into()))?;
671            let key = crate::fs::archive::tree::normalise_path(s);
672            Ok(match self.files.get(&key) {
673                Some(Entry::File(loc)) => Lookup::File(loc.clone()),
674                Some(Entry::Unsupported(r)) => Lookup::Unsupported(r.clone()),
675                None => Lookup::NotRegular,
676            })
677        }
678
679        pub fn open_file<'a>(
680            &self,
681            dev: &'a mut dyn BlockDevice,
682            loc: &FileLoc,
683        ) -> Result<Box<dyn Read + 'a>> {
684            let run = &self.folders[loc.folder];
685            if let Some(reason) = &run.decodable {
686                return Err(Error::Unsupported(reason.clone()));
687            }
688            let mut r = folder_output_reader(dev, run)?;
689            skip_exact(&mut *r, loc.off)?;
690            Ok(Box::new(LimitReader {
691                inner: r,
692                remaining: loc.len,
693            }))
694        }
695    }
696
697    // ─── top-level scan ─────────────────────────────────────────────────────
698
699    pub fn scan(dev: &mut dyn BlockDevice) -> Result<Parsed> {
700        let dev_len = dev.total_size();
701        let sig = read_at(dev, 0, 32)?;
702        if &sig[0..6] != b"7z\xBC\xAF\x27\x1C" {
703            return Err(Error::InvalidImage("7z: bad signature".into()));
704        }
705        let next_off = u64::from_le_bytes(sig[12..20].try_into().unwrap());
706        let next_size = u64::from_le_bytes(sig[20..28].try_into().unwrap());
707        let header_at = 32u64
708            .checked_add(next_off)
709            .ok_or_else(|| Error::InvalidImage("7z: header offset overflow".into()))?;
710        if next_size == 0 {
711            // Empty archive.
712            return Ok(Parsed {
713                index: ArchiveIndex::new("7z"),
714                inner: Inner {
715                    folders: Vec::new(),
716                    files: HashMap::new(),
717                },
718            });
719        }
720        if header_at + next_size > dev_len {
721            return Err(Error::InvalidImage("7z: header past end of file".into()));
722        }
723        let mut header = read_at(dev, header_at, next_size as usize)?;
724
725        // The end header may be an LZMA-packed `kEncodedHeader`; decode it to
726        // recover the real `kHeader` bytes.
727        if header.first() == Some(&K_ENCODED_HEADER) {
728            let mut c = Cur::new(&header[1..]);
729            let si = parse_streams_info(&mut c)?;
730            let runs = build_runs(&si, dev_len)?;
731            if runs.len() != 1 {
732                return Err(Error::Unsupported(
733                    "7z: multi-folder encoded header not supported".into(),
734                ));
735            }
736            if let Some(reason) = &runs[0].decodable {
737                return Err(Error::Unsupported(reason.clone()));
738            }
739            let mut r = folder_output_reader(dev, &runs[0])?;
740            // The folder declares its own unpack size; decoding to that size
741            // unbounded is a decompression bomb at open time. Cap the decoded
742            // header and reject anything that would exceed the cap.
743            let mut decoded = Vec::new();
744            (&mut r)
745                .take(MAX_DECODED_HEADER + 1)
746                .read_to_end(&mut decoded)
747                .map_err(Error::from)?;
748            if decoded.len() as u64 > MAX_DECODED_HEADER {
749                return Err(Error::InvalidImage(
750                    "7z: encoded header decodes too large".into(),
751                ));
752            }
753            header = decoded;
754        }
755
756        let mut index = ArchiveIndex::new("7z");
757        let mut files: HashMap<String, Entry> = HashMap::new();
758
759        let mut c = Cur::new(&header);
760        if c.byte()? != K_HEADER {
761            return Err(Error::InvalidImage("7z: expected kHeader".into()));
762        }
763
764        let mut streams: Option<StreamsInfo> = None;
765        let mut runs: Vec<FolderRun> = Vec::new();
766
767        // Walk the kHeader properties.
768        loop {
769            let id = c.byte()?;
770            match id {
771                0 => break,
772                K_MAIN_STREAMS_INFO => {
773                    let si = parse_streams_info(&mut c)?;
774                    runs = build_runs(&si, dev_len)?;
775                    streams = Some(si);
776                }
777                K_FILES_INFO => {
778                    build_files(&mut c, streams.as_ref(), &runs, &mut index, &mut files)?;
779                }
780                _ => {
781                    // kArchiveProperties / kAdditionalStreamsInfo etc. — these
782                    // appear before the streams we handle; bail rather than
783                    // misparse.
784                    return Err(Error::Unsupported(format!(
785                        "7z: header property {id:#x} not supported"
786                    )));
787                }
788            }
789        }
790
791        Ok(Parsed {
792            index,
793            inner: Inner {
794                folders: runs,
795                files,
796            },
797        })
798    }
799
800    /// Precompute each folder's pack offset/size + decodability.
801    fn build_runs(si: &StreamsInfo, dev_len: u64) -> Result<Vec<FolderRun>> {
802        let base = 32u64 + si.pack_pos;
803        // Prefix sums of pack sizes give each pack stream's offset.
804        let mut pack_off = Vec::with_capacity(si.pack_sizes.len() + 1);
805        let mut acc = base;
806        pack_off.push(acc);
807        for &s in &si.pack_sizes {
808            acc = acc
809                .checked_add(s)
810                .ok_or_else(|| Error::InvalidImage("7z: pack size overflow".into()))?;
811            pack_off.push(acc);
812        }
813
814        let mut runs = Vec::with_capacity(si.folders.len());
815        let mut pack_idx = 0usize;
816        for f in &si.folders {
817            let num_packed = f.num_packed_streams() as usize;
818            let single = f.coders.len() == 1 && num_packed == 1 && f.num_bind_pairs == 0;
819            let pack_offset = *pack_off.get(pack_idx).unwrap_or(&base);
820            // Sum the sizes of this folder's packed streams.
821            let mut pack_size = 0u64;
822            for k in 0..num_packed {
823                pack_size += *si.pack_sizes.get(pack_idx + k).unwrap_or(&0);
824            }
825            pack_idx += num_packed;
826
827            // Cumulative substream offsets within the folder output.
828            let mut sub_offsets = Vec::with_capacity(f.substream_sizes.len() + 1);
829            let mut o = 0u64;
830            sub_offsets.push(0);
831            for &s in &f.substream_sizes {
832                o += s;
833                sub_offsets.push(o);
834            }
835
836            let decodable = if !single {
837                Some("7z: multi-coder / filtered folder not supported".to_string())
838            } else if pack_offset + pack_size > dev_len {
839                Some("7z: folder pack data past end of file".to_string())
840            } else {
841                let id = f.coders[0].id.as_slice();
842                match id {
843                    [0x00] | [0x03, 0x01, 0x01] | [0x04, 0x02, 0x02] | [0x04, 0x01, 0x08] => None,
844                    _ => Some(format!("7z: coder {} not supported", hex(id))),
845                }
846            };
847
848            runs.push(FolderRun {
849                pack_offset,
850                pack_size,
851                unpack_size: f.unpack_size(),
852                coder_id: f.coders.first().map(|c| c.id.clone()).unwrap_or_default(),
853                coder_attr: f.coders.first().map(|c| c.attr.clone()).unwrap_or_default(),
854                sub_offsets,
855                decodable,
856            });
857        }
858        Ok(runs)
859    }
860
861    /// Parse FilesInfo and map stream-bearing files to folder substreams.
862    fn build_files(
863        c: &mut Cur,
864        streams: Option<&StreamsInfo>,
865        runs: &[FolderRun],
866        index: &mut ArchiveIndex,
867        files: &mut HashMap<String, Entry>,
868    ) -> Result<()> {
869        let num_files = c.bounded_count()?;
870        let mut empty_stream = vec![false; num_files];
871        let mut empty_file: Vec<bool> = Vec::new();
872        let mut names: Vec<String> = Vec::new();
873
874        loop {
875            let prop = c.byte()?;
876            if prop == 0 {
877                break;
878            }
879            let size = c.usize_num()?;
880            let end = c.p + size;
881            match prop {
882                K_EMPTY_STREAM => {
883                    empty_stream = c.bits(num_files)?;
884                }
885                K_EMPTY_FILE => {
886                    let n_empty = empty_stream.iter().filter(|&&e| e).count();
887                    empty_file = c.bits(n_empty)?;
888                }
889                K_NAME => {
890                    let external = c.byte()?;
891                    if external != 0 {
892                        return Err(Error::Unsupported(
893                            "7z: external names not supported".into(),
894                        ));
895                    }
896                    let raw = c.bytes(end - c.p)?;
897                    names = decode_names(raw, num_files)?;
898                }
899                _ => {
900                    // kMTime / kWinAttributes / kCTime / kATime / kDummy / …
901                    c.bytes(end - c.p)?;
902                }
903            }
904            // Resync to the declared property end (defensive).
905            c.p = end;
906        }
907
908        if names.len() != num_files {
909            return Err(Error::InvalidImage("7z: name count mismatch".into()));
910        }
911
912        // Build a flat list of substreams (folder, offset, len) in order.
913        struct Sub {
914            folder: usize,
915            off: u64,
916            len: u64,
917        }
918        let mut subs: Vec<Sub> = Vec::new();
919        if let Some(si) = streams {
920            for (fi, f) in si.folders.iter().enumerate() {
921                let run = &runs[fi];
922                for (si2, &len) in f.substream_sizes.iter().enumerate() {
923                    subs.push(Sub {
924                        folder: fi,
925                        off: run.sub_offsets[si2],
926                        len,
927                    });
928                }
929            }
930        }
931
932        let mut empty_idx = 0usize;
933        let mut sub_idx = 0usize;
934        for i in 0..num_files {
935            let path = crate::fs::archive::tree::normalise_path(&names[i]);
936            if empty_stream[i] {
937                let is_empty_file = empty_file.get(empty_idx).copied().unwrap_or(false);
938                empty_idx += 1;
939                if is_empty_file {
940                    if path != "/" {
941                        let mut e = ArchiveEntry::regular(
942                            path.clone(),
943                            crate::fs::archive::DataLocator {
944                                offset: 0,
945                                compressed_len: 0,
946                                uncompressed_len: 0,
947                                method: crate::fs::archive::Method::Stored,
948                            },
949                        );
950                        e.kind = EntryKind::Regular;
951                        index.push(e);
952                        // Empty file: a zero-length stored member.
953                        files.insert(
954                            path,
955                            Entry::File(FileLoc {
956                                folder: usize::MAX,
957                                off: 0,
958                                len: 0,
959                            }),
960                        );
961                    }
962                } else if path != "/" {
963                    index.push(ArchiveEntry::dir(path));
964                }
965            } else {
966                let sub = subs
967                    .get(sub_idx)
968                    .ok_or_else(|| Error::InvalidImage("7z: more files than substreams".into()))?;
969                sub_idx += 1;
970                if path != "/" {
971                    let mut e = ArchiveEntry::regular(
972                        path.clone(),
973                        crate::fs::archive::DataLocator {
974                            offset: 0,
975                            compressed_len: 0,
976                            uncompressed_len: sub.len,
977                            method: crate::fs::archive::Method::Stored,
978                        },
979                    );
980                    e.kind = EntryKind::Regular;
981                    index.push(e);
982
983                    let run = &runs[sub.folder];
984                    if let Some(reason) = &run.decodable {
985                        files.insert(path, Entry::Unsupported(reason.clone()));
986                    } else {
987                        files.insert(
988                            path,
989                            Entry::File(FileLoc {
990                                folder: sub.folder,
991                                off: sub.off,
992                                len: sub.len,
993                            }),
994                        );
995                    }
996                }
997            }
998        }
999        Ok(())
1000    }
1001
1002    /// UTF-16LE, NUL-terminated, back-slash separated names → forward-slash
1003    /// normalised component strings.
1004    fn decode_names(raw: &[u8], num_files: usize) -> Result<Vec<String>> {
1005        let mut out = Vec::with_capacity(num_files);
1006        let mut units: Vec<u16> = Vec::new();
1007        let mut i = 0;
1008        while i + 1 < raw.len() {
1009            let u = u16::from_le_bytes([raw[i], raw[i + 1]]);
1010            i += 2;
1011            if u == 0 {
1012                let s: String = String::from_utf16_lossy(&units)
1013                    .chars()
1014                    .map(|ch| if ch == '\\' { '/' } else { ch })
1015                    .collect();
1016                out.push(s);
1017                units.clear();
1018            } else {
1019                units.push(u);
1020            }
1021        }
1022        Ok(out)
1023    }
1024
1025    /// Read and discard exactly `n` bytes.
1026    fn skip_exact(r: &mut dyn Read, mut n: u64) -> Result<()> {
1027        let mut scratch = [0u8; 64 * 1024];
1028        while n > 0 {
1029            let want = n.min(scratch.len() as u64) as usize;
1030            let got = r.read(&mut scratch[..want]).map_err(Error::from)?;
1031            if got == 0 {
1032                return Err(Error::InvalidImage("7z: folder stream ended early".into()));
1033            }
1034            n -= got as u64;
1035        }
1036        Ok(())
1037    }
1038
1039    /// Caps an owned boxed reader to `remaining` bytes.
1040    pub struct LimitReader<'a> {
1041        inner: Box<dyn Read + 'a>,
1042        remaining: u64,
1043    }
1044    impl Read for LimitReader<'_> {
1045        fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
1046            if self.remaining == 0 {
1047                return Ok(0);
1048            }
1049            let want = buf.len().min(self.remaining as usize);
1050            let n = self.inner.read(&mut buf[..want])?;
1051            self.remaining -= n as u64;
1052            Ok(n)
1053        }
1054    }
1055
1056    /// Wrap decoded bytes as a seekable [`FileReadHandle`].
1057    pub fn mem_handle(bytes: Vec<u8>) -> MemHandle {
1058        let len = bytes.len() as u64;
1059        MemHandle {
1060            cur: io::Cursor::new(bytes),
1061            len,
1062        }
1063    }
1064    pub struct MemHandle {
1065        cur: io::Cursor<Vec<u8>>,
1066        len: u64,
1067    }
1068    impl Read for MemHandle {
1069        fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
1070            self.cur.read(buf)
1071        }
1072    }
1073    impl io::Seek for MemHandle {
1074        fn seek(&mut self, pos: io::SeekFrom) -> io::Result<u64> {
1075            self.cur.seek(pos)
1076        }
1077    }
1078    impl crate::fs::FileReadHandle for MemHandle {
1079        fn len(&self) -> u64 {
1080            self.len
1081        }
1082    }
1083}
1084
1085#[cfg(all(test, feature = "sevenz"))]
1086mod tests {
1087    use std::io::Read;
1088    use std::path::Path;
1089
1090    use super::*;
1091    use crate::block::MemoryBackend;
1092
1093    fn hello() -> Vec<u8> {
1094        b"Hello, 7z reader!\n".repeat(3)
1095    }
1096    fn lorem() -> Vec<u8> {
1097        b"Lorem ipsum dolor sit amet. ".repeat(40)
1098    }
1099
1100    fn dev_from(bytes: &[u8]) -> MemoryBackend {
1101        let mut dev = MemoryBackend::new(bytes.len().max(1) as u64);
1102        dev.write_at(0, bytes).unwrap();
1103        dev
1104    }
1105
1106    fn read_file(arc: &[u8], path: &str) -> Result<Vec<u8>> {
1107        let mut dev = dev_from(arc);
1108        let mut fs = SevenZFs::open(&mut dev)?;
1109        let mut r = fs.read_file(&mut dev, Path::new(path))?;
1110        let mut out = Vec::new();
1111        r.read_to_end(&mut out).map_err(crate::Error::from)?;
1112        Ok(out)
1113    }
1114
1115    fn names(arc: &[u8]) -> Vec<String> {
1116        let mut dev = dev_from(arc);
1117        let mut fs = SevenZFs::open(&mut dev).unwrap();
1118        fs.list(&mut dev, Path::new("/"))
1119            .unwrap()
1120            .iter()
1121            .map(|e| e.name.clone())
1122            .collect()
1123    }
1124
1125    /// Copy folders with an uncompressed (`-mhc=off`) header.
1126    #[test]
1127    fn copy_uncompressed_header() {
1128        let arc = include_bytes!("testdata/copy_nohc.7z");
1129        assert_eq!(read_file(arc, "/hello.txt").unwrap(), hello());
1130        assert_eq!(read_file(arc, "/lorem.txt").unwrap(), lorem());
1131    }
1132
1133    /// Copy folders with a compressed (LZMA `kEncodedHeader`) end header —
1134    /// exercises the header-decode bootstrap.
1135    #[test]
1136    fn copy_compressed_header() {
1137        let arc = include_bytes!("testdata/copy_hc.7z");
1138        assert_eq!(read_file(arc, "/hello.txt").unwrap(), hello());
1139        assert_eq!(read_file(arc, "/lorem.txt").unwrap(), lorem());
1140    }
1141
1142    /// LZMA, solid (both files in one folder) with a compressed header —
1143    /// exercises LZMA decode + substream slicing.
1144    #[test]
1145    fn lzma_solid() {
1146        let arc = include_bytes!("testdata/lzma.7z");
1147        assert_eq!(read_file(arc, "/hello.txt").unwrap(), hello());
1148        assert_eq!(read_file(arc, "/lorem.txt").unwrap(), lorem());
1149        let n = names(arc);
1150        assert!(
1151            n.iter().any(|x| x == "hello.txt") && n.iter().any(|x| x == "lorem.txt"),
1152            "{n:?}"
1153        );
1154    }
1155
1156    /// BZip2 and Deflate single-coder folders decode via compcol.
1157    #[test]
1158    fn bzip2_and_deflate() {
1159        for arc in [
1160            &include_bytes!("testdata/bzip2.7z")[..],
1161            &include_bytes!("testdata/deflate.7z")[..],
1162        ] {
1163            assert_eq!(read_file(arc, "/hello.txt").unwrap(), hello());
1164            assert_eq!(read_file(arc, "/lorem.txt").unwrap(), lorem());
1165        }
1166    }
1167
1168    /// LZMA2 (the 7-Zip default) lists correctly but reads `Unsupported`
1169    /// pending a raw-LZMA2 entry point in compcol.
1170    #[test]
1171    fn lzma2_lists_but_read_is_unsupported() {
1172        let arc = include_bytes!("testdata/lzma2.7z");
1173        let n = names(arc);
1174        assert!(
1175            n.iter().any(|x| x == "hello.txt") && n.iter().any(|x| x == "lorem.txt"),
1176            "{n:?}"
1177        );
1178        let err = read_file(arc, "/hello.txt").unwrap_err();
1179        assert!(
1180            matches!(err, crate::Error::Unsupported(_)),
1181            "expected Unsupported, got {err:?}"
1182        );
1183    }
1184
1185    /// Cross-check against the reference `7z` tool when installed.
1186    #[test]
1187    fn matches_7z_reference() {
1188        use std::process::Command;
1189        if Command::new("7z").arg("--help").output().is_err() {
1190            eprintln!("skipping: 7z not installed");
1191            return;
1192        }
1193        let arc = include_bytes!("testdata/lzma.7z");
1194        let mut tmp = tempfile::NamedTempFile::new().unwrap();
1195        std::io::Write::write_all(tmp.as_file_mut(), arc).unwrap();
1196        for name in ["hello.txt", "lorem.txt"] {
1197            let out = Command::new("7z")
1198                .args(["x", "-so", tmp.path().to_str().unwrap(), name])
1199                .output()
1200                .unwrap();
1201            // `7z x -so` behaves inconsistently across platforms/builds; only
1202            // cross-check when the reference tool actually produced the file.
1203            if !out.status.success() {
1204                eprintln!("skipping: `7z x -so` unavailable here");
1205                return;
1206            }
1207            assert_eq!(
1208                read_file(arc, &format!("/{name}")).unwrap(),
1209                out.stdout,
1210                "reader vs 7z mismatch for {name}"
1211            );
1212        }
1213    }
1214}