sevenz_rust2/
reader.rs

1use std::{
2    cell::RefCell,
3    collections::HashMap,
4    fs::File,
5    io,
6    io::{Read, Seek, SeekFrom},
7    num::NonZeroUsize,
8    rc::Rc,
9};
10
11use crc32fast::Hasher;
12use lzma_rust2::filter::bcj2::Bcj2Reader;
13
14use crate::{
15    ByteReader, Password, archive::*, bitset::BitSet, block::*, decoder::add_decoder, error::Error,
16};
17
18const MAX_MEM_LIMIT_KB: usize = usize::MAX / 1024;
19
20pub struct BoundedReader<R: Read> {
21    inner: R,
22    remain: usize,
23}
24
25impl<R: Read> BoundedReader<R> {
26    pub fn new(inner: R, max_size: usize) -> Self {
27        Self {
28            inner,
29            remain: max_size,
30        }
31    }
32}
33
34impl<R: Read> Read for BoundedReader<R> {
35    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
36        if self.remain == 0 {
37            return Ok(0);
38        }
39        let bound = buf.len().min(self.remain);
40        let size = self.inner.read(&mut buf[..bound])?;
41        self.remain -= size;
42        Ok(size)
43    }
44}
45
46/// A special reader that shares it's inner reader with other instances and
47/// needs to re-seek every read operation.
48#[derive(Debug)]
49pub(crate) struct SharedBoundedReader<'a, R> {
50    inner: Rc<RefCell<&'a mut R>>,
51    cur: u64,
52    bounds: (u64, u64),
53}
54
55impl<'a, R> Clone for SharedBoundedReader<'a, R> {
56    fn clone(&self) -> Self {
57        Self {
58            inner: Rc::clone(&self.inner),
59            cur: self.cur,
60            bounds: self.bounds,
61        }
62    }
63}
64
65impl<'a, R: Read + Seek> Seek for SharedBoundedReader<'a, R> {
66    fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
67        let new_pos = match pos {
68            SeekFrom::Start(pos) => self.bounds.0 as i64 + pos as i64,
69            SeekFrom::End(pos) => self.bounds.1 as i64 + pos,
70            SeekFrom::Current(pos) => self.cur as i64 + pos,
71        };
72        if new_pos < 0 {
73            return Err(io::Error::other("SeekBeforeStart"));
74        }
75        self.cur = new_pos as u64;
76        self.inner.borrow_mut().seek(SeekFrom::Start(self.cur))
77    }
78}
79
80impl<'a, R: Read + Seek> Read for SharedBoundedReader<'a, R> {
81    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
82        if self.cur >= self.bounds.1 {
83            return Ok(0);
84        }
85
86        let mut inner = self.inner.borrow_mut();
87
88        inner.seek(SeekFrom::Start(self.cur))?;
89
90        let bound = buf.len().min((self.bounds.1 - self.cur) as usize);
91        let size = inner.read(&mut buf[..bound])?;
92        self.cur += size as u64;
93        Ok(size)
94    }
95}
96
97impl<'a, R: Read + Seek> SharedBoundedReader<'a, R> {
98    fn new(inner: Rc<RefCell<&'a mut R>>, bounds: (u64, u64)) -> Self {
99        Self {
100            inner,
101            cur: bounds.0,
102            bounds,
103        }
104    }
105}
106
107struct Crc32VerifyingReader<R> {
108    inner: R,
109    crc_digest: Hasher,
110    expected_value: u64,
111    remaining: i64,
112}
113
114impl<R: Read> Crc32VerifyingReader<R> {
115    fn new(inner: R, remaining: usize, expected_value: u64) -> Self {
116        Self {
117            inner,
118            crc_digest: Hasher::new(),
119            expected_value,
120            remaining: remaining as i64,
121        }
122    }
123}
124
125impl<R: Read> Read for Crc32VerifyingReader<R> {
126    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
127        if self.remaining <= 0 {
128            return Ok(0);
129        }
130        let size = self.inner.read(buf)?;
131        if size > 0 {
132            self.remaining -= size as i64;
133            self.crc_digest.update(&buf[..size]);
134        }
135        if self.remaining <= 0 {
136            let d = std::mem::replace(&mut self.crc_digest, Hasher::new()).finalize();
137            if d as u64 != self.expected_value {
138                return Err(std::io::Error::other(Error::ChecksumVerificationFailed));
139            }
140        }
141        Ok(size)
142    }
143}
144
145impl Archive {
146    /// Open 7z file under specified `path`.
147    #[inline]
148    pub fn open(path: impl AsRef<std::path::Path>) -> Result<Archive, Error> {
149        Self::open_with_password(path, &Password::empty())
150    }
151
152    /// Open an encrypted 7z file under specified `path` with `password`.
153    ///
154    /// # Parameters
155    /// - `reader`   - the path to the 7z file
156    /// - `password` - archive password encoded in utf16 little endian
157    #[inline]
158    pub fn open_with_password(
159        path: impl AsRef<std::path::Path>,
160        password: &Password,
161    ) -> Result<Archive, Error> {
162        let mut file = File::open(path)?;
163        Self::read(&mut file, password)
164    }
165
166    /// Read 7z file archive info use the specified `reader`.
167    ///
168    /// # Parameters
169    /// - `reader`   - the reader of the 7z filr archive
170    /// - `password` - archive password encoded in utf16 little endian
171    ///
172    /// # Example
173    ///
174    /// ```no_run
175    /// use std::{
176    ///     fs::File,
177    ///     io::{Read, Seek},
178    /// };
179    ///
180    /// use sevenz_rust2::*;
181    ///
182    /// let mut reader = File::open("example.7z").unwrap();
183    ///
184    /// let password = Password::from("the password");
185    /// let archive = Archive::read(&mut reader, &password).unwrap();
186    ///
187    /// for entry in &archive.files {
188    ///     println!("{}", entry.name());
189    /// }
190    /// ```
191    pub fn read<R: Read + Seek>(reader: &mut R, password: &Password) -> Result<Archive, Error> {
192        let reader_len = reader.seek(SeekFrom::End(0))?;
193        reader.seek(SeekFrom::Start(0))?;
194
195        let mut signature = [0; 6];
196        reader.read_exact(&mut signature)?;
197        if signature != SEVEN_Z_SIGNATURE {
198            return Err(Error::BadSignature(signature));
199        }
200        let mut versions = [0; 2];
201        reader.read_exact(&mut versions)?;
202        let version_major = versions[0];
203        let version_minor = versions[1];
204        if version_major != 0 {
205            return Err(Error::UnsupportedVersion {
206                major: version_major,
207                minor: version_minor,
208            });
209        }
210
211        let start_header_crc = reader.read_u32()?;
212
213        let header_valid = if start_header_crc == 0 {
214            let current_position = reader.stream_position()?;
215            let mut buf = [0; 20];
216            reader.read_exact(&mut buf)?;
217            reader.seek(SeekFrom::Start(current_position))?;
218            buf.iter().any(|a| *a != 0)
219        } else {
220            true
221        };
222        if header_valid {
223            let start_header = Self::read_start_header(reader, start_header_crc)?;
224            Self::init_archive(reader, start_header, password, true, 1)
225        } else {
226            Self::try_to_locale_end_header(reader, reader_len, password, 1)
227        }
228    }
229
230    fn read_start_header<R: Read>(
231        reader: &mut R,
232        start_header_crc: u32,
233    ) -> Result<StartHeader, Error> {
234        let mut buf = [0; 20];
235        reader.read_exact(&mut buf)?;
236        let crc32 = crc32fast::hash(&buf);
237        if crc32 != start_header_crc {
238            return Err(Error::ChecksumVerificationFailed);
239        }
240        let mut buf_read = buf.as_slice();
241        let offset = buf_read.read_u64()?;
242
243        let size = buf_read.read_u64()?;
244        let crc = buf_read.read_u32()?;
245        Ok(StartHeader {
246            next_header_offset: offset,
247            next_header_size: size,
248            next_header_crc: crc as u64,
249        })
250    }
251
252    fn read_header<R: Read + Seek>(header: &mut R, archive: &mut Archive) -> Result<(), Error> {
253        let mut nid = header.read_u8()?;
254        if nid == K_ARCHIVE_PROPERTIES {
255            Self::read_archive_properties(header)?;
256            nid = header.read_u8()?;
257        }
258
259        if nid == K_ADDITIONAL_STREAMS_INFO {
260            return Err(Error::other("Additional streams unsupported"));
261        }
262        if nid == K_MAIN_STREAMS_INFO {
263            Self::read_streams_info(header, archive)?;
264            nid = header.read_u8()?;
265        }
266        if nid == K_FILES_INFO {
267            Self::read_files_info(header, archive)?;
268            nid = header.read_u8()?;
269        }
270        if nid != K_END {
271            return Err(Error::BadTerminatedHeader(nid));
272        }
273
274        Ok(())
275    }
276
277    fn read_archive_properties<R: Read + Seek>(header: &mut R) -> Result<(), Error> {
278        let mut nid = header.read_u8()?;
279        while nid != K_END {
280            let property_size = read_variable_usize(header, "propertySize")?;
281            header.seek(SeekFrom::Current(property_size as i64))?;
282            nid = header.read_u8()?;
283        }
284        Ok(())
285    }
286
287    fn try_to_locale_end_header<R: Read + Seek>(
288        reader: &mut R,
289        reader_len: u64,
290        password: &Password,
291        thread_count: u32,
292    ) -> Result<Self, Error> {
293        let search_limit = 1024 * 1024;
294        let prev_data_size = reader.stream_position()? + 20;
295        let size = reader_len;
296        let min_pos = if reader.stream_position()? + search_limit > size {
297            reader.stream_position()?
298        } else {
299            size - search_limit
300        };
301        let mut pos = reader_len - 1;
302        while pos > min_pos {
303            pos -= 1;
304
305            reader.seek(SeekFrom::Start(pos))?;
306            let nid = reader.read_u8()?;
307            if nid == K_ENCODED_HEADER || nid == K_HEADER {
308                let start_header = StartHeader {
309                    next_header_offset: pos - prev_data_size,
310                    next_header_size: reader_len - pos,
311                    next_header_crc: 0,
312                };
313                let result =
314                    Self::init_archive(reader, start_header, password, false, thread_count)?;
315
316                if !result.files.is_empty() {
317                    return Ok(result);
318                }
319            }
320        }
321        Err(Error::other(
322            "Start header corrupt and unable to guess end header",
323        ))
324    }
325
326    fn init_archive<R: Read + Seek>(
327        reader: &mut R,
328        start_header: StartHeader,
329        password: &Password,
330        verify_crc: bool,
331        thread_count: u32,
332    ) -> Result<Self, Error> {
333        if start_header.next_header_size > usize::MAX as u64 {
334            return Err(Error::other(format!(
335                "Cannot handle next_header_size {}",
336                start_header.next_header_size
337            )));
338        }
339
340        let next_header_size_int = start_header.next_header_size as usize;
341
342        reader.seek(SeekFrom::Start(
343            SIGNATURE_HEADER_SIZE + start_header.next_header_offset,
344        ))?;
345
346        let mut buf = vec![0; next_header_size_int];
347        reader.read_exact(&mut buf)?;
348        if verify_crc && crc32fast::hash(&buf) as u64 != start_header.next_header_crc {
349            return Err(Error::NextHeaderCrcMismatch);
350        }
351
352        let mut archive = Archive::default();
353        let mut buf_reader = buf.as_slice();
354        let mut nid = buf_reader.read_u8()?;
355        let mut header = if nid == K_ENCODED_HEADER {
356            let (mut out_reader, buf_size) = Self::read_encoded_header(
357                &mut buf_reader,
358                reader,
359                &mut archive,
360                password,
361                thread_count,
362            )?;
363            buf.clear();
364            buf.resize(buf_size, 0);
365            out_reader
366                .read_exact(&mut buf)
367                .map_err(|e| Error::bad_password(e, !password.is_empty()))?;
368            archive = Archive::default();
369            buf_reader = buf.as_slice();
370            nid = buf_reader.read_u8()?;
371            buf_reader
372        } else {
373            buf_reader
374        };
375        let mut header = std::io::Cursor::new(&mut header);
376        if nid == K_HEADER {
377            Self::read_header(&mut header, &mut archive)?;
378        } else {
379            return Err(Error::other("Broken or unsupported archive: no Header"));
380        }
381
382        archive.is_solid = archive
383            .blocks
384            .iter()
385            .any(|block| block.num_unpack_sub_streams > 1);
386
387        Ok(archive)
388    }
389
390    fn read_encoded_header<'r, R: Read, RI: 'r + Read + Seek>(
391        header: &mut R,
392        reader: &'r mut RI,
393        archive: &mut Archive,
394        password: &Password,
395        thread_count: u32,
396    ) -> Result<(Box<dyn Read + 'r>, usize), Error> {
397        Self::read_streams_info(header, archive)?;
398        let block = archive
399            .blocks
400            .first()
401            .ok_or(Error::other("no blocks, can't read encoded header"))?;
402        let first_pack_stream_index = 0;
403        let block_offset = SIGNATURE_HEADER_SIZE + archive.pack_pos;
404        if archive.pack_sizes.is_empty() {
405            return Err(Error::other("no packed streams, can't read encoded header"));
406        }
407
408        reader.seek(SeekFrom::Start(block_offset))?;
409        let coder_len = block.coders.len();
410        let unpack_size = block.get_unpack_size() as usize;
411        let pack_size = archive.pack_sizes[first_pack_stream_index] as usize;
412        let input_reader = BoundedReader::new(reader, pack_size);
413        let mut decoder: Box<dyn Read> = Box::new(input_reader);
414        let mut decoder = if coder_len > 0 {
415            for (index, coder) in block.ordered_coder_iter() {
416                if coder.num_in_streams != 1 || coder.num_out_streams != 1 {
417                    return Err(Error::other(
418                        "Multi input/output stream coders are not yet supported",
419                    ));
420                }
421                let next = add_decoder(
422                    decoder,
423                    block.get_unpack_size_at_index(index) as usize,
424                    coder,
425                    password,
426                    MAX_MEM_LIMIT_KB,
427                    thread_count,
428                )?;
429                decoder = Box::new(next);
430            }
431            decoder
432        } else {
433            decoder
434        };
435        if block.has_crc {
436            decoder = Box::new(Crc32VerifyingReader::new(decoder, unpack_size, block.crc));
437        }
438
439        Ok((decoder, unpack_size))
440    }
441
442    fn read_streams_info<R: Read>(header: &mut R, archive: &mut Archive) -> Result<(), Error> {
443        let mut nid = header.read_u8()?;
444        if nid == K_PACK_INFO {
445            Self::read_pack_info(header, archive)?;
446            nid = header.read_u8()?;
447        }
448
449        if nid == K_UNPACK_INFO {
450            Self::read_unpack_info(header, archive)?;
451            nid = header.read_u8()?;
452        } else {
453            archive.blocks.clear();
454        }
455        if nid == K_SUB_STREAMS_INFO {
456            Self::read_sub_streams_info(header, archive)?;
457            nid = header.read_u8()?;
458        }
459        if nid != K_END {
460            return Err(Error::BadTerminatedStreamsInfo(nid));
461        }
462
463        Ok(())
464    }
465
466    fn read_files_info<R: Read + Seek>(header: &mut R, archive: &mut Archive) -> Result<(), Error> {
467        let num_files = read_variable_usize(header, "num files")?;
468        let mut files: Vec<ArchiveEntry> = vec![Default::default(); num_files];
469
470        let mut is_empty_stream: Option<BitSet> = None;
471        let mut is_empty_file: Option<BitSet> = None;
472        let mut is_anti: Option<BitSet> = None;
473        loop {
474            let prop_type = header.read_u8()?;
475            if prop_type == 0 {
476                break;
477            }
478            let size = read_variable_u64(header)?;
479            match prop_type {
480                K_EMPTY_STREAM => {
481                    is_empty_stream = Some(read_bits(header, num_files)?);
482                }
483                K_EMPTY_FILE => {
484                    let n = if let Some(s) = &is_empty_stream {
485                        s.len()
486                    } else {
487                        return Err(Error::other(
488                            "Header format error: kEmptyStream must appear before kEmptyFile",
489                        ));
490                    };
491                    is_empty_file = Some(read_bits(header, n)?);
492                }
493                K_ANTI => {
494                    let n = if let Some(s) = is_empty_stream.as_ref() {
495                        s.len()
496                    } else {
497                        return Err(Error::other(
498                            "Header format error: kEmptyStream must appear before kEmptyFile",
499                        ));
500                    };
501                    is_anti = Some(read_bits(header, n)?);
502                }
503                K_NAME => {
504                    let external = header.read_u8()?;
505                    if external != 0 {
506                        return Err(Error::other("Not implemented:external != 0"));
507                    }
508                    if (size - 1) & 1 != 0 {
509                        return Err(Error::other("file names length invalid"));
510                    }
511
512                    let size = assert_usize(size, "file names length")?;
513                    // let mut names = vec![0u8; size - 1];
514                    // header.read_exact(&mut names)?;
515                    let names_reader = NamesReader::new(header, size - 1);
516
517                    let mut next_file = 0;
518                    for s in names_reader {
519                        files[next_file].name = s?;
520                        next_file += 1;
521                    }
522
523                    if next_file != files.len() {
524                        return Err(Error::other("Error parsing file names"));
525                    }
526                }
527                K_C_TIME => {
528                    let times_defined = read_all_or_bits(header, num_files)?;
529                    let external = header.read_u8()?;
530                    if external != 0 {
531                        return Err(Error::other(format!(
532                            "kCTime Unimplemented:external={external}"
533                        )));
534                    }
535                    for (i, file) in files.iter_mut().enumerate() {
536                        file.has_creation_date = times_defined.contains(i);
537                        if file.has_creation_date {
538                            file.creation_date = header.read_u64()?.into();
539                        }
540                    }
541                }
542                K_A_TIME => {
543                    let times_defined = read_all_or_bits(header, num_files)?;
544                    let external = header.read_u8()?;
545                    if external != 0 {
546                        return Err(Error::other(format!(
547                            "kATime Unimplemented:external={external}"
548                        )));
549                    }
550                    for (i, file) in files.iter_mut().enumerate() {
551                        file.has_access_date = times_defined.contains(i);
552                        if file.has_access_date {
553                            file.access_date = header.read_u64()?.into();
554                        }
555                    }
556                }
557                K_M_TIME => {
558                    let times_defined = read_all_or_bits(header, num_files)?;
559                    let external = header.read_u8()?;
560                    if external != 0 {
561                        return Err(Error::other(format!(
562                            "kMTime Unimplemented:external={external}"
563                        )));
564                    }
565                    for (i, file) in files.iter_mut().enumerate() {
566                        file.has_last_modified_date = times_defined.contains(i);
567                        if file.has_last_modified_date {
568                            file.last_modified_date = header.read_u64()?.into();
569                        }
570                    }
571                }
572                K_WIN_ATTRIBUTES => {
573                    let times_defined = read_all_or_bits(header, num_files)?;
574                    let external = header.read_u8()?;
575                    if external != 0 {
576                        return Err(Error::other(format!(
577                            "kWinAttributes Unimplemented:external={external}"
578                        )));
579                    }
580                    for (i, file) in files.iter_mut().enumerate() {
581                        file.has_windows_attributes = times_defined.contains(i);
582                        if file.has_windows_attributes {
583                            file.windows_attributes = header.read_u32()?;
584                        }
585                    }
586                }
587                K_START_POS => return Err(Error::other("kStartPos is unsupported, please report")),
588                K_DUMMY => {
589                    header.seek(SeekFrom::Current(size as i64))?;
590                }
591                _ => {
592                    header.seek(SeekFrom::Current(size as i64))?;
593                }
594            };
595        }
596
597        let mut non_empty_file_counter = 0;
598        let mut empty_file_counter = 0;
599        for (i, file) in files.iter_mut().enumerate() {
600            file.has_stream = is_empty_stream
601                .as_ref()
602                .map(|s| !s.contains(i))
603                .unwrap_or(true);
604            if file.has_stream {
605                let sub_stream_info = if let Some(s) = archive.sub_streams_info.as_ref() {
606                    s
607                } else {
608                    return Err(Error::other(
609                        "Archive contains file with streams but no subStreamsInfo",
610                    ));
611                };
612                file.is_directory = false;
613                file.is_anti_item = false;
614                file.has_crc = sub_stream_info.has_crc.contains(non_empty_file_counter);
615                file.crc = sub_stream_info.crcs[non_empty_file_counter];
616                file.size = sub_stream_info.unpack_sizes[non_empty_file_counter];
617                non_empty_file_counter += 1;
618            } else {
619                file.is_directory = if let Some(s) = &is_empty_file {
620                    !s.contains(empty_file_counter)
621                } else {
622                    true
623                };
624                file.is_anti_item = is_anti
625                    .as_ref()
626                    .map(|s| s.contains(empty_file_counter))
627                    .unwrap_or(false);
628                file.has_crc = false;
629                file.size = 0;
630                empty_file_counter += 1;
631            }
632        }
633        archive.files = files;
634
635        Self::calculate_stream_map(archive)?;
636        Ok(())
637    }
638
639    fn calculate_stream_map(archive: &mut Archive) -> Result<(), Error> {
640        let mut stream_map = StreamMap::default();
641
642        let mut next_block_pack_stream_index = 0;
643        let num_blocks = archive.blocks.len();
644        stream_map.block_first_pack_stream_index = vec![0; num_blocks];
645        for i in 0..num_blocks {
646            stream_map.block_first_pack_stream_index[i] = next_block_pack_stream_index;
647            next_block_pack_stream_index += archive.blocks[i].packed_streams.len();
648        }
649
650        let mut next_pack_stream_offset = 0;
651        let num_pack_sizes = archive.pack_sizes.len();
652        stream_map.pack_stream_offsets = vec![0; num_pack_sizes];
653        for i in 0..num_pack_sizes {
654            stream_map.pack_stream_offsets[i] = next_pack_stream_offset;
655            next_pack_stream_offset += archive.pack_sizes[i];
656        }
657
658        stream_map.block_first_file_index = vec![0; num_blocks];
659        stream_map.file_block_index = vec![None; archive.files.len()];
660        let mut next_block_index = 0;
661        let mut next_block_unpack_stream_index = 0;
662        for i in 0..archive.files.len() {
663            if !archive.files[i].has_stream && next_block_unpack_stream_index == 0 {
664                stream_map.file_block_index[i] = None;
665                continue;
666            }
667            if next_block_unpack_stream_index == 0 {
668                while next_block_index < archive.blocks.len() {
669                    stream_map.block_first_file_index[next_block_index] = i;
670                    if archive.blocks[next_block_index].num_unpack_sub_streams > 0 {
671                        break;
672                    }
673                    next_block_index += 1;
674                }
675                if next_block_index >= archive.blocks.len() {
676                    return Err(Error::other("Too few blocks in archive"));
677                }
678            }
679            stream_map.file_block_index[i] = Some(next_block_index);
680            if !archive.files[i].has_stream {
681                continue;
682            }
683
684            //set `compressed_size` of first file in block
685            if stream_map.block_first_file_index[next_block_index] == i {
686                let first_pack_stream_index =
687                    stream_map.block_first_pack_stream_index[next_block_index];
688                let pack_size = archive.pack_sizes[first_pack_stream_index];
689
690                archive.files[i].compressed_size = pack_size;
691            }
692
693            next_block_unpack_stream_index += 1;
694            if next_block_unpack_stream_index
695                >= archive.blocks[next_block_index].num_unpack_sub_streams
696            {
697                next_block_index += 1;
698                next_block_unpack_stream_index = 0;
699            }
700        }
701
702        archive.stream_map = stream_map;
703        Ok(())
704    }
705
706    fn read_pack_info<R: Read>(header: &mut R, archive: &mut Archive) -> Result<(), Error> {
707        archive.pack_pos = read_variable_u64(header)?;
708        let num_pack_streams = read_variable_usize(header, "num pack streams")?;
709        let mut nid = header.read_u8()?;
710        if nid == K_SIZE {
711            archive.pack_sizes = vec![0u64; num_pack_streams];
712            for i in 0..archive.pack_sizes.len() {
713                archive.pack_sizes[i] = read_variable_u64(header)?;
714            }
715            nid = header.read_u8()?;
716        }
717
718        if nid == K_CRC {
719            archive.pack_crcs_defined = read_all_or_bits(header, num_pack_streams)?;
720            archive.pack_crcs = vec![0; num_pack_streams];
721            for i in 0..num_pack_streams {
722                if archive.pack_crcs_defined.contains(i) {
723                    archive.pack_crcs[i] = header.read_u32()? as u64;
724                }
725            }
726            nid = header.read_u8()?;
727        }
728
729        if nid != K_END {
730            return Err(Error::BadTerminatedPackInfo(nid));
731        }
732
733        Ok(())
734    }
735    fn read_unpack_info<R: Read>(header: &mut R, archive: &mut Archive) -> Result<(), Error> {
736        let nid = header.read_u8()?;
737        if nid != K_FOLDER {
738            return Err(Error::other(format!("Expected kFolder, got {nid}")));
739        }
740        let num_blocks = read_variable_usize(header, "num blocks")?;
741
742        archive.blocks.reserve_exact(num_blocks);
743        let external = header.read_u8()?;
744        if external != 0 {
745            return Err(Error::ExternalUnsupported);
746        }
747
748        for _ in 0..num_blocks {
749            archive.blocks.push(Self::read_block(header)?);
750        }
751
752        let nid = header.read_u8()?;
753        if nid != K_CODERS_UNPACK_SIZE {
754            return Err(Error::other(format!(
755                "Expected kCodersUnpackSize, got {nid}"
756            )));
757        }
758
759        for block in archive.blocks.iter_mut() {
760            let tos = block.total_output_streams;
761            block.unpack_sizes.reserve_exact(tos);
762            for _ in 0..tos {
763                block.unpack_sizes.push(read_variable_u64(header)?);
764            }
765        }
766
767        let mut nid = header.read_u8()?;
768        if nid == K_CRC {
769            let crcs_defined = read_all_or_bits(header, num_blocks)?;
770            for i in 0..num_blocks {
771                if crcs_defined.contains(i) {
772                    archive.blocks[i].has_crc = true;
773                    archive.blocks[i].crc = header.read_u32()? as u64;
774                } else {
775                    archive.blocks[i].has_crc = false;
776                }
777            }
778            nid = header.read_u8()?;
779        }
780        if nid != K_END {
781            return Err(Error::BadTerminatedUnpackInfo);
782        }
783
784        Ok(())
785    }
786
787    fn read_sub_streams_info<R: Read>(header: &mut R, archive: &mut Archive) -> Result<(), Error> {
788        for block in archive.blocks.iter_mut() {
789            block.num_unpack_sub_streams = 1;
790        }
791        let mut total_unpack_streams = archive.blocks.len();
792
793        let mut nid = header.read_u8()?;
794        if nid == K_NUM_UNPACK_STREAM {
795            total_unpack_streams = 0;
796            for block in archive.blocks.iter_mut() {
797                let num_streams = read_variable_usize(header, "numStreams")?;
798                block.num_unpack_sub_streams = num_streams;
799                total_unpack_streams += num_streams;
800            }
801            nid = header.read_u8()?;
802        }
803
804        let mut sub_streams_info = SubStreamsInfo::default();
805        sub_streams_info
806            .unpack_sizes
807            .resize(total_unpack_streams, Default::default());
808        sub_streams_info
809            .has_crc
810            .reserve_len_exact(total_unpack_streams);
811        sub_streams_info.crcs = vec![0; total_unpack_streams];
812
813        let mut next_unpack_stream = 0;
814        for block in archive.blocks.iter() {
815            if block.num_unpack_sub_streams == 0 {
816                continue;
817            }
818            let mut sum = 0;
819            if nid == K_SIZE {
820                for _i in 0..block.num_unpack_sub_streams - 1 {
821                    let size = read_variable_u64(header)?;
822                    sub_streams_info.unpack_sizes[next_unpack_stream] = size;
823                    next_unpack_stream += 1;
824                    sum += size;
825                }
826            }
827            if sum > block.get_unpack_size() {
828                return Err(Error::other(
829                    "sum of unpack sizes of block exceeds total unpack size",
830                ));
831            }
832            // Calculate the last size from the total minus the sum of N-1 sizes.
833            sub_streams_info.unpack_sizes[next_unpack_stream] = block.get_unpack_size() - sum;
834            next_unpack_stream += 1;
835        }
836        if nid == K_SIZE {
837            nid = header.read_u8()?;
838        }
839
840        let mut num_digests = 0;
841        for block in archive.blocks.iter() {
842            if block.num_unpack_sub_streams != 1 || !block.has_crc {
843                num_digests += block.num_unpack_sub_streams;
844            }
845        }
846
847        if nid == K_CRC {
848            let has_missing_crc = read_all_or_bits(header, num_digests)?;
849            let mut missing_crcs = vec![0; num_digests];
850            for (i, missing_crc) in missing_crcs.iter_mut().enumerate() {
851                if has_missing_crc.contains(i) {
852                    *missing_crc = header.read_u32()? as u64;
853                }
854            }
855            let mut next_crc = 0;
856            let mut next_missing_crc = 0;
857            for block in archive.blocks.iter() {
858                if block.num_unpack_sub_streams == 1 && block.has_crc {
859                    sub_streams_info.has_crc.insert(next_crc);
860                    sub_streams_info.crcs[next_crc] = block.crc;
861                    next_crc += 1;
862                } else {
863                    for _i in 0..block.num_unpack_sub_streams {
864                        if has_missing_crc.contains(next_missing_crc) {
865                            sub_streams_info.has_crc.insert(next_crc);
866                        } else {
867                            sub_streams_info.has_crc.remove(next_crc);
868                        }
869                        sub_streams_info.crcs[next_crc] = missing_crcs[next_missing_crc];
870                        next_crc += 1;
871                        next_missing_crc += 1;
872                    }
873                }
874            }
875
876            nid = header.read_u8()?;
877        }
878
879        if nid != K_END {
880            return Err(Error::BadTerminatedSubStreamsInfo);
881        }
882
883        archive.sub_streams_info = Some(sub_streams_info);
884        Ok(())
885    }
886
887    fn read_block<R: Read>(header: &mut R) -> Result<Block, Error> {
888        let mut block = Block::default();
889
890        let num_coders = read_variable_usize(header, "num coders")?;
891        let mut coders = Vec::with_capacity(num_coders);
892        let mut total_in_streams = 0;
893        let mut total_out_streams = 0;
894        for _i in 0..num_coders {
895            let mut coder = Coder::default();
896            let bits = header.read_u8()?;
897            let id_size = bits & 0xF;
898            let is_simple = (bits & 0x10) == 0;
899            let has_attributes = (bits & 0x20) != 0;
900            let more_alternative_methods = (bits & 0x80) != 0;
901
902            coder.id_size = id_size as usize;
903
904            header.read_exact(coder.decompression_method_id_mut())?;
905            if is_simple {
906                coder.num_in_streams = 1;
907                coder.num_out_streams = 1;
908            } else {
909                coder.num_in_streams = read_variable_u64(header)?;
910                coder.num_out_streams = read_variable_u64(header)?;
911            }
912            total_in_streams += coder.num_in_streams;
913            total_out_streams += coder.num_out_streams;
914            if has_attributes {
915                let properties_size = read_variable_usize(header, "properties size")?;
916                let mut props = vec![0u8; properties_size];
917                header.read_exact(&mut props)?;
918                coder.properties = props;
919            }
920            coders.push(coder);
921            // would need to keep looping as above:
922            if more_alternative_methods {
923                return Err(Error::other(
924                    "Alternative methods are unsupported, please report. The reference implementation doesn't support them either.",
925                ));
926            }
927        }
928        block.coders = coders;
929        let total_in_streams = assert_usize(total_in_streams, "totalInStreams")?;
930        let total_out_streams = assert_usize(total_out_streams, "totalOutStreams")?;
931        block.total_input_streams = total_in_streams;
932        block.total_output_streams = total_out_streams;
933
934        if total_out_streams == 0 {
935            return Err(Error::other("Total output streams can't be 0"));
936        }
937        let num_bind_pairs = total_out_streams - 1;
938        let mut bind_pairs = Vec::with_capacity(num_bind_pairs);
939        for _ in 0..num_bind_pairs {
940            let bp = BindPair {
941                in_index: read_variable_u64(header)?,
942                out_index: read_variable_u64(header)?,
943            };
944            bind_pairs.push(bp);
945        }
946        block.bind_pairs = bind_pairs;
947
948        if total_in_streams < num_bind_pairs {
949            return Err(Error::other(
950                "Total input streams can't be less than the number of bind pairs",
951            ));
952        }
953        let num_packed_streams = total_in_streams - num_bind_pairs;
954        let mut packed_streams = vec![0; num_packed_streams];
955        if num_packed_streams == 1 {
956            let mut index = u64::MAX;
957            for i in 0..total_in_streams {
958                if block.find_bind_pair_for_in_stream(i as u64).is_none() {
959                    index = i as u64;
960                    break;
961                }
962            }
963            if index == u64::MAX {
964                return Err(Error::other("Couldn't find stream's bind pair index"));
965            }
966            packed_streams[0] = index;
967        } else {
968            for packed_stream in packed_streams.iter_mut() {
969                *packed_stream = read_variable_u64(header)?;
970            }
971        }
972        block.packed_streams = packed_streams;
973
974        Ok(block)
975    }
976}
977
978#[inline]
979fn read_variable_usize<R: Read>(reader: &mut R, field: &str) -> Result<usize, Error> {
980    let size = read_variable_u64(reader)?;
981    assert_usize(size, field)
982}
983
984#[inline]
985fn assert_usize(size: u64, field: &str) -> Result<usize, Error> {
986    if size > usize::MAX as u64 {
987        return Err(Error::other(format!("Cannot handle {field} {size}")));
988    }
989    Ok(size as usize)
990}
991
992fn read_variable_u64<R: Read>(reader: &mut R) -> io::Result<u64> {
993    let first = reader.read_u8()? as u64;
994    let mut mask = 0x80_u64;
995    let mut value = 0;
996    for i in 0..8 {
997        if (first & mask) == 0 {
998            return Ok(value | ((first & (mask - 1)) << (8 * i)));
999        }
1000        let b = reader.read_u8()? as u64;
1001        value |= b << (8 * i);
1002        mask >>= 1;
1003    }
1004    Ok(value)
1005}
1006
1007fn read_all_or_bits<R: Read>(header: &mut R, size: usize) -> io::Result<BitSet> {
1008    let all = header.read_u8()?;
1009    if all != 0 {
1010        let mut bits = BitSet::with_capacity(size);
1011        for i in 0..size {
1012            bits.insert(i);
1013        }
1014        Ok(bits)
1015    } else {
1016        read_bits(header, size)
1017    }
1018}
1019
1020fn read_bits<R: Read>(header: &mut R, size: usize) -> io::Result<BitSet> {
1021    let mut bits = BitSet::with_capacity(size);
1022    let mut mask = 0u32;
1023    let mut cache = 0u32;
1024    for i in 0..size {
1025        if mask == 0 {
1026            mask = 0x80;
1027            cache = header.read_u8()? as u32;
1028        }
1029        if (cache & mask) != 0 {
1030            bits.insert(i);
1031        }
1032        mask >>= 1;
1033    }
1034    Ok(bits)
1035}
1036
1037struct NamesReader<'a, R: Read> {
1038    max_bytes: usize,
1039    read_bytes: usize,
1040    cache: Vec<u16>,
1041    reader: &'a mut R,
1042}
1043
1044impl<'a, R: Read> NamesReader<'a, R> {
1045    fn new(reader: &'a mut R, max_bytes: usize) -> Self {
1046        Self {
1047            max_bytes,
1048            reader,
1049            read_bytes: 0,
1050            cache: Vec::with_capacity(16),
1051        }
1052    }
1053}
1054
1055impl<R: Read> Iterator for NamesReader<'_, R> {
1056    type Item = Result<String, Error>;
1057
1058    fn next(&mut self) -> Option<Self::Item> {
1059        if self.max_bytes <= self.read_bytes {
1060            return None;
1061        }
1062        self.cache.clear();
1063        let mut buf = [0; 2];
1064        while self.read_bytes < self.max_bytes {
1065            let r = self.reader.read_exact(&mut buf);
1066            self.read_bytes += 2;
1067            if let Err(e) = r {
1068                return Some(Err(e.into()));
1069            }
1070            let u = u16::from_le_bytes(buf);
1071            if u == 0 {
1072                break;
1073            }
1074            self.cache.push(u);
1075        }
1076
1077        Some(String::from_utf16(&self.cache).map_err(|e| Error::other(e.to_string())))
1078    }
1079}
1080
1081#[derive(Copy, Clone)]
1082struct IndexEntry {
1083    block_index: Option<usize>,
1084    file_index: usize,
1085}
1086
1087/// Reads a 7z archive file.
1088pub struct ArchiveReader<R: Read + Seek> {
1089    source: R,
1090    archive: Archive,
1091    password: Password,
1092    thread_count: u32,
1093    index: HashMap<String, IndexEntry>,
1094}
1095
1096#[cfg(not(target_arch = "wasm32"))]
1097impl ArchiveReader<File> {
1098    /// Opens a 7z archive file at the given `path` and creates a [`ArchiveReader`] to read it.
1099    #[inline]
1100    pub fn open(path: impl AsRef<std::path::Path>, password: Password) -> Result<Self, Error> {
1101        let file = File::open(path.as_ref())
1102            .map_err(|e| Error::file_open(e, path.as_ref().to_string_lossy().to_string()))?;
1103        Self::new(file, password)
1104    }
1105}
1106
1107impl<R: Read + Seek> ArchiveReader<R> {
1108    /// Creates a [`ArchiveReader`] to read a 7z archive file from the given `source` reader.
1109    #[inline]
1110    pub fn new(mut source: R, password: Password) -> Result<Self, Error> {
1111        let archive = Archive::read(&mut source, &password)?;
1112
1113        let mut reader = Self {
1114            source,
1115            archive,
1116            password,
1117            thread_count: 1,
1118            index: HashMap::default(),
1119        };
1120
1121        reader.fill_index();
1122
1123        let thread_count =
1124            std::thread::available_parallelism().unwrap_or(NonZeroUsize::new(1).unwrap());
1125        reader.set_thread_count(thread_count.get() as u32);
1126
1127        Ok(reader)
1128    }
1129
1130    /// Creates an [`ArchiveReader`] from an existing [`Archive`] instance.
1131    ///
1132    /// This is useful when you already have a parsed archive and want to create a reader
1133    /// without re-parsing the archive structure.
1134    ///
1135    /// # Arguments
1136    /// * `archive` - An existing parsed archive instance
1137    /// * `source` - The reader providing access to the archive data
1138    /// * `password` - Password for encrypted archives
1139    #[inline]
1140    pub fn from_archive(archive: Archive, source: R, password: Password) -> Self {
1141        let mut reader = Self {
1142            source,
1143            archive,
1144            password,
1145            thread_count: 1,
1146            index: HashMap::default(),
1147        };
1148
1149        reader.fill_index();
1150
1151        let thread_count =
1152            std::thread::available_parallelism().unwrap_or(NonZeroUsize::new(1).unwrap());
1153        reader.set_thread_count(thread_count.get() as u32);
1154
1155        reader
1156    }
1157
1158    /// Sets the thread count to use when multi-threading is supported by the de-compression
1159    /// (currently only LZMA2 if encoded with MT support).
1160    ///
1161    /// Defaults to `std::thread::available_parallelism()` if not set manually.
1162    pub fn set_thread_count(&mut self, thread_count: u32) {
1163        self.thread_count = thread_count.clamp(1, 256);
1164    }
1165
1166    fn fill_index(&mut self) {
1167        for (file_index, file) in self.archive.files.iter().enumerate() {
1168            let block_index = self.archive.stream_map.file_block_index[file_index];
1169
1170            self.index.insert(
1171                file.name.clone(),
1172                IndexEntry {
1173                    block_index,
1174                    file_index,
1175                },
1176            );
1177        }
1178    }
1179
1180    /// Returns a reference to the underlying [`Archive`] structure.
1181    ///
1182    /// This provides access to the archive metadata including files, blocks,
1183    /// and compression information.
1184    #[inline]
1185    pub fn archive(&self) -> &Archive {
1186        &self.archive
1187    }
1188
1189    fn build_decode_stack<'r>(
1190        source: &'r mut R,
1191        archive: &Archive,
1192        block_index: usize,
1193        password: &Password,
1194        thread_count: u32,
1195    ) -> Result<(Box<dyn Read + 'r>, usize), Error> {
1196        let block = &archive.blocks[block_index];
1197        if block.total_input_streams > block.total_output_streams {
1198            return Self::build_decode_stack2(source, archive, block_index, password, thread_count);
1199        }
1200        let first_pack_stream_index = archive.stream_map.block_first_pack_stream_index[block_index];
1201        let block_offset = SIGNATURE_HEADER_SIZE
1202            + archive.pack_pos
1203            + archive.stream_map.pack_stream_offsets[first_pack_stream_index];
1204
1205        let (mut has_crc, mut crc) = (block.has_crc, block.crc);
1206
1207        // Single stream blocks might have it's CRC stored in the single substream information.
1208        if !has_crc && block.num_unpack_sub_streams == 1 {
1209            if let Some(sub_streams_info) = archive.sub_streams_info.as_ref() {
1210                let mut substream_index = 0;
1211                for i in 0..block_index {
1212                    substream_index += archive.blocks[i].num_unpack_sub_streams;
1213                }
1214
1215                // Only when there is a single stream, we can use it's CRC to verify the compressed block data.
1216                // Multiple streams would contain the CRC of the compressed data for each file in the block.
1217                if sub_streams_info.has_crc.contains(substream_index) {
1218                    has_crc = true;
1219                    crc = sub_streams_info.crcs[substream_index];
1220                }
1221            }
1222        }
1223
1224        source.seek(SeekFrom::Start(block_offset))?;
1225        let pack_size = archive.pack_sizes[first_pack_stream_index] as usize;
1226
1227        let mut decoder: Box<dyn Read> = Box::new(BoundedReader::new(source, pack_size));
1228        let block = &archive.blocks[block_index];
1229        for (index, coder) in block.ordered_coder_iter() {
1230            if coder.num_in_streams != 1 || coder.num_out_streams != 1 {
1231                return Err(Error::unsupported(
1232                    "Multi input/output stream coders are not supported",
1233                ));
1234            }
1235            let next = add_decoder(
1236                decoder,
1237                block.get_unpack_size_at_index(index) as usize,
1238                coder,
1239                password,
1240                MAX_MEM_LIMIT_KB,
1241                thread_count,
1242            )?;
1243            decoder = Box::new(next);
1244        }
1245        if has_crc {
1246            decoder = Box::new(Crc32VerifyingReader::new(
1247                decoder,
1248                block.get_unpack_size() as usize,
1249                crc,
1250            ));
1251        }
1252
1253        Ok((decoder, pack_size))
1254    }
1255
1256    fn build_decode_stack2<'r>(
1257        source: &'r mut R,
1258        archive: &Archive,
1259        block_index: usize,
1260        password: &Password,
1261        thread_count: u32,
1262    ) -> Result<(Box<dyn Read + 'r>, usize), Error> {
1263        const MAX_CODER_COUNT: usize = 32;
1264        let block = &archive.blocks[block_index];
1265        if block.coders.len() > MAX_CODER_COUNT {
1266            return Err(Error::unsupported(format!(
1267                "Too many coders: {}",
1268                block.coders.len()
1269            )));
1270        }
1271
1272        assert!(block.total_input_streams > block.total_output_streams);
1273        let shared_source = Rc::new(RefCell::new(source));
1274        let first_pack_stream_index = archive.stream_map.block_first_pack_stream_index[block_index];
1275        let start_pos = SIGNATURE_HEADER_SIZE + archive.pack_pos;
1276        let offsets = &archive.stream_map.pack_stream_offsets[first_pack_stream_index..];
1277
1278        let mut sources = Vec::with_capacity(block.packed_streams.len());
1279
1280        for (i, offset) in offsets[..block.packed_streams.len()].iter().enumerate() {
1281            let pack_pos = start_pos + offset;
1282            let pack_size = archive.pack_sizes[first_pack_stream_index + i];
1283
1284            let pack_reader = SharedBoundedReader::new(
1285                Rc::clone(&shared_source),
1286                (pack_pos, pack_pos + pack_size),
1287            );
1288
1289            sources.push(pack_reader);
1290        }
1291
1292        let mut coder_to_stream_map = [usize::MAX; MAX_CODER_COUNT];
1293
1294        let mut si = 0;
1295        for (i, coder) in block.coders.iter().enumerate() {
1296            coder_to_stream_map[i] = si;
1297            si += coder.num_in_streams as usize;
1298        }
1299
1300        let main_coder_index = {
1301            let mut coder_used = [false; MAX_CODER_COUNT];
1302            for bp in block.bind_pairs.iter() {
1303                coder_used[bp.out_index as usize] = true;
1304            }
1305            let mut mci = 0;
1306            for (i, used) in coder_used[..block.coders.len()].iter().enumerate() {
1307                if !used {
1308                    mci = i;
1309                    break;
1310                }
1311            }
1312            mci
1313        };
1314
1315        let id = block.coders[main_coder_index].encoder_method_id();
1316        if id != EncoderMethod::ID_BCJ2 {
1317            return Err(Error::unsupported(format!("Unsupported method: {id:?}")));
1318        }
1319
1320        let num_in_streams = block.coders[main_coder_index].num_in_streams as usize;
1321        let mut inputs: Vec<Box<dyn Read>> = Vec::with_capacity(num_in_streams);
1322        let start_i = coder_to_stream_map[main_coder_index];
1323        for i in start_i..num_in_streams + start_i {
1324            inputs.push(Self::get_in_stream(
1325                block,
1326                &sources,
1327                &coder_to_stream_map,
1328                password,
1329                i,
1330                thread_count,
1331            )?);
1332        }
1333        let mut decoder: Box<dyn Read> = Box::new(Bcj2Reader::new(inputs, block.get_unpack_size()));
1334        if block.has_crc {
1335            decoder = Box::new(Crc32VerifyingReader::new(
1336                decoder,
1337                block.get_unpack_size() as usize,
1338                block.crc,
1339            ));
1340        }
1341        Ok((
1342            decoder,
1343            archive.pack_sizes[first_pack_stream_index] as usize,
1344        ))
1345    }
1346
1347    fn get_in_stream<'r>(
1348        block: &Block,
1349        sources: &[SharedBoundedReader<'r, R>],
1350        coder_to_stream_map: &[usize],
1351        password: &Password,
1352        in_stream_index: usize,
1353        thread_count: u32,
1354    ) -> Result<Box<dyn Read + 'r>, Error>
1355    where
1356        R: 'r,
1357    {
1358        let index = block
1359            .packed_streams
1360            .iter()
1361            .position(|&i| i == in_stream_index as u64);
1362        if let Some(index) = index {
1363            return Ok(Box::new(sources[index].clone()));
1364        }
1365
1366        let bp = block
1367            .find_bind_pair_for_in_stream(in_stream_index as u64)
1368            .ok_or_else(|| {
1369                Error::other(format!(
1370                    "Couldn't find bind pair for stream {in_stream_index}"
1371                ))
1372            })?;
1373        let index = bp.out_index as usize;
1374
1375        Self::get_in_stream2(
1376            block,
1377            sources,
1378            coder_to_stream_map,
1379            password,
1380            index,
1381            thread_count,
1382        )
1383    }
1384
1385    fn get_in_stream2<'r>(
1386        block: &Block,
1387        sources: &[SharedBoundedReader<'r, R>],
1388        coder_to_stream_map: &[usize],
1389        password: &Password,
1390        in_stream_index: usize,
1391        thread_count: u32,
1392    ) -> Result<Box<dyn Read + 'r>, Error>
1393    where
1394        R: 'r,
1395    {
1396        let coder = &block.coders[in_stream_index];
1397        let start_index = coder_to_stream_map[in_stream_index];
1398        if start_index == usize::MAX {
1399            return Err(Error::other("in_stream_index out of range"));
1400        }
1401        let uncompressed_len = block.unpack_sizes[in_stream_index] as usize;
1402        if coder.num_in_streams == 1 {
1403            let input = Self::get_in_stream(
1404                block,
1405                sources,
1406                coder_to_stream_map,
1407                password,
1408                start_index,
1409                thread_count,
1410            )?;
1411
1412            let decoder = add_decoder(
1413                input,
1414                uncompressed_len,
1415                coder,
1416                password,
1417                MAX_MEM_LIMIT_KB,
1418                thread_count,
1419            )?;
1420            return Ok(Box::new(decoder));
1421        }
1422        Err(Error::unsupported(
1423            "Multi input stream coders are not yet supported",
1424        ))
1425    }
1426
1427    /// Takes a closure to decode each files in the archive.
1428    ///
1429    /// Attention about solid archive:
1430    /// When decoding a solid archive, the data to be decompressed depends on the data in front of it,
1431    /// you cannot simply skip the previous data and only decompress the data in the back.
1432    pub fn for_each_entries<F: FnMut(&ArchiveEntry, &mut dyn Read) -> Result<bool, Error>>(
1433        &mut self,
1434        mut each: F,
1435    ) -> Result<(), Error> {
1436        let block_count = self.archive.blocks.len();
1437        for block_index in 0..block_count {
1438            let forder_dec = BlockDecoder::new(
1439                self.thread_count,
1440                block_index,
1441                &self.archive,
1442                &self.password,
1443                &mut self.source,
1444            );
1445            forder_dec.for_each_entries(&mut each)?;
1446        }
1447        // decode empty files
1448        for file_index in 0..self.archive.files.len() {
1449            let block_index = self.archive.stream_map.file_block_index[file_index];
1450            if block_index.is_none() {
1451                let file = &self.archive.files[file_index];
1452                let empty_reader: &mut dyn Read = &mut ([0u8; 0].as_slice());
1453                if !each(file, empty_reader)? {
1454                    return Ok(());
1455                }
1456            }
1457        }
1458        Ok(())
1459    }
1460
1461    /// Returns the data of a file with the given path inside the archive.
1462    ///
1463    /// # Notice
1464    /// This function is very inefficient when used with solid archives, since
1465    /// it needs to decode all data before the actual file.
1466    pub fn read_file(&mut self, name: &str) -> Result<Vec<u8>, Error> {
1467        let index_entry = *self.index.get(name).ok_or(Error::FileNotFound)?;
1468        let file = &self.archive.files[index_entry.file_index];
1469
1470        if !file.has_stream {
1471            return Ok(Vec::new());
1472        }
1473
1474        let block_index = index_entry
1475            .block_index
1476            .ok_or_else(|| Error::other("File has no associated block"))?;
1477
1478        match self.archive.is_solid {
1479            true => {
1480                let mut result = None;
1481                let target_file_ptr = file as *const _;
1482
1483                BlockDecoder::new(
1484                    self.thread_count,
1485                    block_index,
1486                    &self.archive,
1487                    &self.password,
1488                    &mut self.source,
1489                )
1490                .for_each_entries(&mut |archive_entry, reader| {
1491                    let mut data = Vec::with_capacity(archive_entry.size as usize);
1492                    reader.read_to_end(&mut data)?;
1493
1494                    if std::ptr::eq(archive_entry, target_file_ptr) {
1495                        result = Some(data);
1496                        Ok(false)
1497                    } else {
1498                        Ok(true)
1499                    }
1500                })?;
1501
1502                result.ok_or(Error::FileNotFound)
1503            }
1504            false => {
1505                let pack_index = self.archive.stream_map.block_first_pack_stream_index[block_index];
1506                let pack_offset = self.archive.stream_map.pack_stream_offsets[pack_index];
1507                let block_offset = SIGNATURE_HEADER_SIZE + self.archive.pack_pos + pack_offset;
1508
1509                self.source.seek(SeekFrom::Start(block_offset))?;
1510
1511                let (mut block_reader, _size) = Self::build_decode_stack(
1512                    &mut self.source,
1513                    &self.archive,
1514                    block_index,
1515                    &self.password,
1516                    self.thread_count,
1517                )?;
1518
1519                let mut data = Vec::with_capacity(file.size as usize);
1520                let mut decoder: Box<dyn Read> =
1521                    Box::new(BoundedReader::new(&mut block_reader, file.size as usize));
1522
1523                if file.has_crc {
1524                    decoder = Box::new(Crc32VerifyingReader::new(
1525                        decoder,
1526                        file.size as usize,
1527                        file.crc,
1528                    ));
1529                }
1530
1531                decoder.read_to_end(&mut data)?;
1532
1533                Ok(data)
1534            }
1535        }
1536    }
1537
1538    /// Get the compression method(s) used for a specific file in the archive.
1539    pub fn file_compression_methods(
1540        &self,
1541        file_name: &str,
1542        methods: &mut Vec<EncoderMethod>,
1543    ) -> Result<(), Error> {
1544        let index_entry = self.index.get(file_name).ok_or(Error::FileNotFound)?;
1545        let file = &self.archive.files[index_entry.file_index];
1546
1547        if !file.has_stream {
1548            return Ok(());
1549        }
1550
1551        let block_index = index_entry
1552            .block_index
1553            .ok_or_else(|| Error::other("File has no associated block"))?;
1554
1555        let block = self
1556            .archive
1557            .blocks
1558            .get(block_index)
1559            .ok_or_else(|| Error::other("Block not found"))?;
1560
1561        block
1562            .coders
1563            .iter()
1564            .filter_map(|coder| EncoderMethod::by_id(coder.encoder_method_id()))
1565            .for_each(|method| {
1566                methods.push(method);
1567            });
1568
1569        Ok(())
1570    }
1571}
1572
1573/// Decoder for a specific block within a 7z archive.
1574///
1575/// Provides access to entries within a single compression block and allows
1576/// decoding files from that block.
1577pub struct BlockDecoder<'a, R: Read + Seek> {
1578    thread_count: u32,
1579    block_index: usize,
1580    archive: &'a Archive,
1581    password: &'a Password,
1582    source: &'a mut R,
1583}
1584
1585impl<'a, R: Read + Seek> BlockDecoder<'a, R> {
1586    /// Creates a new [`BlockDecoder`] for decoding a specific block in the archive.
1587    ///
1588    /// # Arguments
1589    /// * `thread_count` - Number of threads to use for multi-threaded decompression (if supported
1590    ///   by the codec)
1591    /// * `block_index` - Index of the block to decode within the archive
1592    /// * `archive` - Reference to the archive containing the block
1593    /// * `password` - Password for encrypted blocks
1594    /// * `source` - Mutable reference to the reader providing archive data
1595    pub fn new(
1596        thread_count: u32,
1597        block_index: usize,
1598        archive: &'a Archive,
1599        password: &'a Password,
1600        source: &'a mut R,
1601    ) -> Self {
1602        Self {
1603            thread_count,
1604            block_index,
1605            archive,
1606            password,
1607            source,
1608        }
1609    }
1610
1611    /// Sets the thread count to use when multi-threading is supported by the de-compression
1612    /// (currently only LZMA2 if encoded with MT support).
1613    pub fn set_thread_count(&mut self, thread_count: u32) {
1614        self.thread_count = thread_count.clamp(1, 256);
1615    }
1616
1617    /// Returns a slice of archive entries contained in this block.
1618    ///
1619    /// The entries are returned in the order they appear in the block.
1620    pub fn entries(&self) -> &[ArchiveEntry] {
1621        let start = self.archive.stream_map.block_first_file_index[self.block_index];
1622        let file_count = self.archive.blocks[self.block_index].num_unpack_sub_streams;
1623        &self.archive.files[start..(file_count + start)]
1624    }
1625
1626    /// Returns the number of entries contained in this block.
1627    pub fn entry_count(&self) -> usize {
1628        self.archive.blocks[self.block_index].num_unpack_sub_streams
1629    }
1630
1631    /// Takes a closure to decode each files in this block.
1632    ///
1633    /// When decoding files in a block, the data to be decompressed depends on the data in front of
1634    /// it, you cannot simply skip the previous data and only decompress the data in the back.
1635    ///
1636    /// Non-solid archives use one block per file and allow more effective decoding of single files.
1637    pub fn for_each_entries<F: FnMut(&ArchiveEntry, &mut dyn Read) -> Result<bool, Error>>(
1638        self,
1639        each: &mut F,
1640    ) -> Result<bool, Error> {
1641        let Self {
1642            thread_count,
1643            block_index,
1644            archive,
1645            password,
1646            source,
1647        } = self;
1648        let (mut block_reader, _size) = ArchiveReader::build_decode_stack(
1649            source,
1650            archive,
1651            block_index,
1652            password,
1653            thread_count,
1654        )?;
1655        let start = archive.stream_map.block_first_file_index[block_index];
1656        let file_count = archive.blocks[block_index].num_unpack_sub_streams;
1657
1658        for file_index in start..(file_count + start) {
1659            let file = &archive.files[file_index];
1660            if file.has_stream && file.size > 0 {
1661                let mut decoder: Box<dyn Read> =
1662                    Box::new(BoundedReader::new(&mut block_reader, file.size as usize));
1663                if file.has_crc {
1664                    decoder = Box::new(Crc32VerifyingReader::new(
1665                        decoder,
1666                        file.size as usize,
1667                        file.crc,
1668                    ));
1669                }
1670                if !each(file, &mut decoder)
1671                    .map_err(|e| e.maybe_bad_password(!self.password.is_empty()))?
1672                {
1673                    return Ok(false);
1674                }
1675            } else {
1676                let empty_reader: &mut dyn Read = &mut ([0u8; 0].as_slice());
1677                if !each(file, empty_reader)? {
1678                    return Ok(false);
1679                }
1680            }
1681        }
1682        Ok(true)
1683    }
1684}