sevenz_rust2/
archive.rs

1#[cfg(feature = "compress")]
2use crate::encoder_options::EncoderOptions;
3use crate::{NtTime, bitset::BitSet, block::*};
4
5/// Size of the 7z signature header in bytes (32 bytes).
6/// This is needed for calculating absolute byte offsets within the archive.
7pub const SIGNATURE_HEADER_SIZE: u64 = 32;
8pub(crate) const SEVEN_Z_SIGNATURE: &[u8] = &[b'7', b'z', 0xBC, 0xAF, 0x27, 0x1C];
9
10pub(crate) const K_END: u8 = 0x00;
11pub(crate) const K_HEADER: u8 = 0x01;
12pub(crate) const K_ARCHIVE_PROPERTIES: u8 = 0x02;
13pub(crate) const K_ADDITIONAL_STREAMS_INFO: u8 = 0x03;
14pub(crate) const K_MAIN_STREAMS_INFO: u8 = 0x04;
15pub(crate) const K_FILES_INFO: u8 = 0x05;
16pub(crate) const K_PACK_INFO: u8 = 0x06;
17pub(crate) const K_UNPACK_INFO: u8 = 0x07;
18pub(crate) const K_SUB_STREAMS_INFO: u8 = 0x08;
19pub(crate) const K_SIZE: u8 = 0x09;
20pub(crate) const K_CRC: u8 = 0x0A;
21pub(crate) const K_FOLDER: u8 = 0x0B;
22pub(crate) const K_CODERS_UNPACK_SIZE: u8 = 0x0C;
23pub(crate) const K_NUM_UNPACK_STREAM: u8 = 0x0D;
24pub(crate) const K_EMPTY_STREAM: u8 = 0x0E;
25pub(crate) const K_EMPTY_FILE: u8 = 0x0F;
26pub(crate) const K_ANTI: u8 = 0x10;
27pub(crate) const K_NAME: u8 = 0x11;
28pub(crate) const K_C_TIME: u8 = 0x12;
29pub(crate) const K_A_TIME: u8 = 0x13;
30pub(crate) const K_M_TIME: u8 = 0x14;
31pub(crate) const K_WIN_ATTRIBUTES: u8 = 0x15;
32
33/// TODO: Implement reading & writing comments
34#[allow(unused)]
35pub(crate) const K_COMMENT: u8 = 0x16;
36pub(crate) const K_ENCODED_HEADER: u8 = 0x17;
37pub(crate) const K_START_POS: u8 = 0x18;
38pub(crate) const K_DUMMY: u8 = 0x19;
39
40/// Represents a parsed 7z archive structure.
41///
42/// Contains metadata about the archive including files, compression blocks,
43/// and internal structure information necessary for decompression.
44#[derive(Debug, Default, Clone)]
45pub struct Archive {
46    /// Offset from beginning of file + SIGNATURE_HEADER_SIZE to packed streams.
47    pub(crate) pack_pos: u64,
48    pub(crate) pack_sizes: Vec<u64>,
49    pub(crate) pack_crcs_defined: BitSet,
50    pub(crate) pack_crcs: Vec<u64>,
51    pub(crate) sub_streams_info: Option<SubStreamsInfo>,
52    /// Compression blocks in the archive.
53    pub blocks: Vec<Block>,
54    /// File and directory entries in the archive.
55    pub files: Vec<ArchiveEntry>,
56    /// Mapping between files, blocks, and pack streams.
57    pub stream_map: StreamMap,
58    /// Whether this is a solid archive (better compression, slower random access).
59    pub is_solid: bool,
60}
61
62impl Archive {
63    /// Returns the offset from beginning of file + SIGNATURE_HEADER_SIZE to packed streams.
64    /// Used for calculating byte offsets when streaming uncompressed (COPY) content.
65    pub fn pack_pos(&self) -> u64 {
66        self.pack_pos
67    }
68
69    /// Returns the sizes of each packed stream in bytes.
70    /// Used for calculating byte ranges when streaming.
71    pub fn pack_sizes(&self) -> &[u64] {
72        &self.pack_sizes
73    }
74}
75
76#[derive(Debug, Default, Clone)]
77pub(crate) struct SubStreamsInfo {
78    pub(crate) unpack_sizes: Vec<u64>,
79    pub(crate) has_crc: BitSet,
80    pub(crate) crcs: Vec<u64>,
81}
82
83/// Represents a single file or directory entry within a 7z archive.
84///
85/// Contains metadata about the entry including name, timestamps, attributes,
86/// and size information.
87#[derive(Debug, Default, Clone)]
88pub struct ArchiveEntry {
89    /// Name/path of the entry within the archive.
90    pub name: String,
91    /// Whether this entry has associated data stream.
92    pub has_stream: bool,
93    /// Whether this entry is a directory.
94    pub is_directory: bool,
95    /// Whether this is an anti-item (used for deletion in updates).
96    pub is_anti_item: bool,
97    /// Whether creation date is present.
98    pub has_creation_date: bool,
99    /// Whether last modified date is present.
100    pub has_last_modified_date: bool,
101    /// Whether access date is present.
102    pub has_access_date: bool,
103    /// Creation date and time.
104    pub creation_date: NtTime,
105    /// Last modified date and time.
106    pub last_modified_date: NtTime,
107    /// Last access date and time.
108    pub access_date: NtTime,
109    /// Whether Windows file attributes are present.
110    pub has_windows_attributes: bool,
111    /// Windows file attributes.
112    pub windows_attributes: u32,
113    /// Whether CRC is present.
114    pub has_crc: bool,
115    /// CRC32 checksum of uncompressed data.
116    pub crc: u64,
117    /// CRC32 checksum of compressed data.
118    pub compressed_crc: u64,
119    /// Uncompressed size in bytes.
120    pub size: u64,
121    /// Compressed size in bytes.
122    pub compressed_size: u64,
123}
124
125impl ArchiveEntry {
126    /// Creates a new default archive entry.
127    pub fn new() -> Self {
128        Self::default()
129    }
130
131    /// Creates a new archive entry representing a file.
132    ///
133    /// # Arguments
134    /// * `entry_name` - The name/path of the file within the archive
135    pub fn new_file(entry_name: &str) -> Self {
136        Self {
137            name: entry_name.to_string(),
138            has_stream: true,
139            is_directory: false,
140            ..Default::default()
141        }
142    }
143
144    /// Creates a new archive entry representing a directory.
145    ///
146    /// # Arguments
147    /// * `entry_name` - The name/path of the directory within the archive
148    pub fn new_directory(entry_name: &str) -> Self {
149        Self {
150            name: entry_name.to_string(),
151            has_stream: false,
152            is_directory: true,
153            ..Default::default()
154        }
155    }
156
157    /// Creates a new archive entry from a filesystem path.
158    ///
159    /// Automatically extracts metadata like timestamps and attributes from the filesystem.
160    /// On Windows, backslashes in the entry name are converted to forward slashes.
161    ///
162    /// # Arguments
163    /// * `path` - The filesystem path to extract metadata from
164    /// * `entry_name` - The name/path to use for this entry within the archive
165    pub fn from_path(path: impl AsRef<std::path::Path>, entry_name: String) -> Self {
166        let path = path.as_ref();
167        #[cfg(target_os = "windows")]
168        let entry_name = {
169            let mut name_bytes = entry_name.into_bytes();
170            for b in &mut name_bytes {
171                if *b == b'\\' {
172                    *b = b'/';
173                }
174            }
175            String::from_utf8(name_bytes).unwrap()
176        };
177        let mut entry = ArchiveEntry {
178            name: entry_name,
179            has_stream: path.is_file(),
180            is_directory: path.is_dir(),
181            ..Default::default()
182        };
183
184        if let Ok(meta) = path.metadata() {
185            if let Ok(modified) = meta.modified() {
186                if let Ok(date) = NtTime::try_from(modified) {
187                    entry.last_modified_date = date;
188                    entry.has_last_modified_date = entry.last_modified_date.0 > 0;
189                }
190            }
191            if let Ok(date) = meta.created() {
192                if let Ok(date) = NtTime::try_from(date) {
193                    entry.creation_date = date;
194                    entry.has_creation_date = entry.creation_date.0 > 0;
195                }
196            }
197            if let Ok(date) = meta.accessed() {
198                if let Ok(date) = NtTime::try_from(date) {
199                    entry.access_date = date;
200                    entry.has_access_date = entry.access_date.0 > 0;
201                }
202            }
203        }
204        entry
205    }
206
207    /// Returns the name/path of this entry within the archive.
208    pub fn name(&self) -> &str {
209        self.name.as_ref()
210    }
211
212    /// Returns whether this entry is a directory.
213    pub fn is_directory(&self) -> bool {
214        self.is_directory
215    }
216
217    /// Returns whether this entry has an associated data stream.
218    pub fn has_stream(&self) -> bool {
219        self.has_stream
220    }
221
222    /// Returns the creation date of this entry.
223    pub fn creation_date(&self) -> NtTime {
224        self.creation_date
225    }
226
227    /// Returns the last modified date of this entry.
228    pub fn last_modified_date(&self) -> NtTime {
229        self.last_modified_date
230    }
231
232    /// Returns the uncompressed size of this entry in bytes.
233    pub fn size(&self) -> u64 {
234        self.size
235    }
236
237    /// Returns the Windows file attributes of this entry.
238    pub fn windows_attributes(&self) -> u32 {
239        self.windows_attributes
240    }
241
242    /// Returns the last access date of this entry.
243    pub fn access_date(&self) -> NtTime {
244        self.access_date
245    }
246
247    /// Returns whether this entry is an anti-item (used for deletion in updates).
248    pub fn is_anti_item(&self) -> bool {
249        self.is_anti_item
250    }
251}
252
253/// Configuration for encoding methods when compressing data.
254///
255/// Combines an encoder method with optional encoder-specific options.
256#[cfg(feature = "compress")]
257#[derive(Debug, Default)]
258pub struct EncoderConfiguration {
259    /// The encoder method to use.
260    pub method: EncoderMethod,
261    /// Optional encoder-specific options.
262    pub options: Option<EncoderOptions>,
263}
264
265#[cfg(feature = "compress")]
266impl From<EncoderMethod> for EncoderConfiguration {
267    fn from(value: EncoderMethod) -> Self {
268        Self::new(value)
269    }
270}
271
272#[cfg(feature = "compress")]
273impl Clone for EncoderConfiguration {
274    fn clone(&self) -> Self {
275        Self {
276            method: self.method,
277            options: self.options.clone(),
278        }
279    }
280}
281
282#[cfg(feature = "compress")]
283impl EncoderConfiguration {
284    /// Creates a new encoder configuration with the specified method.
285    ///
286    /// # Arguments
287    /// * `method` - The encoder method to use
288    pub fn new(method: EncoderMethod) -> Self {
289        Self {
290            method,
291            options: None,
292        }
293    }
294
295    /// Adds encoder-specific options to this configuration.
296    ///
297    /// # Arguments
298    /// * `options` - The encoder options to apply
299    pub fn with_options(mut self, options: EncoderOptions) -> Self {
300        self.options = Some(options);
301        self
302    }
303}
304
305/// Encoder method that can be chained (filter, compression and encryption).
306#[derive(Debug, Clone, Copy, Eq, PartialEq, Default, Hash)]
307pub struct EncoderMethod(&'static str, &'static [u8]);
308
309impl EncoderMethod {
310    /// Method ID for COPY (no compression).
311    pub const ID_COPY: &'static [u8] = &[0x00];
312    /// Method ID for Delta filter.
313    pub const ID_DELTA: &'static [u8] = &[0x03];
314
315    /// Method ID for LZMA compression.
316    pub const ID_LZMA: &'static [u8] = &[0x03, 0x01, 0x01];
317    /// Method ID for BCJ x86 filter.
318    pub const ID_BCJ_X86: &'static [u8] = &[0x03, 0x03, 0x01, 0x03];
319    /// Method ID for BCJ2 filter.
320    pub const ID_BCJ2: &'static [u8] = &[0x03, 0x03, 0x01, 0x1B];
321    /// Method ID for BCJ PowerPC filter.
322    pub const ID_BCJ_PPC: &'static [u8] = &[0x03, 0x03, 0x02, 0x05];
323    /// Method ID for BCJ IA64 filter.
324    pub const ID_BCJ_IA64: &'static [u8] = &[0x03, 0x03, 0x04, 0x01];
325    /// Method ID for BCJ ARM filter.
326    pub const ID_BCJ_ARM: &'static [u8] = &[0x03, 0x03, 0x05, 0x01];
327    /// Method ID for BCJ ARM64 filter.
328    pub const ID_BCJ_ARM64: &'static [u8] = &[0xA];
329    /// Method ID for BCJ ARM Thumb filter.
330    pub const ID_BCJ_ARM_THUMB: &'static [u8] = &[0x03, 0x03, 0x07, 0x01];
331    /// Method ID for BCJ SPARC filter.
332    pub const ID_BCJ_SPARC: &'static [u8] = &[0x03, 0x03, 0x08, 0x05];
333    /// Method ID for BCJ RISCV filter.
334    pub const ID_BCJ_RISCV: &'static [u8] = &[0xB];
335    /// Method ID for PPMD compression.
336    pub const ID_PPMD: &'static [u8] = &[0x03, 0x04, 0x01];
337
338    /// Method ID for LZMA2 compression.
339    pub const ID_LZMA2: &'static [u8] = &[0x21];
340    /// Method ID for BZIP2 compression.
341    pub const ID_BZIP2: &'static [u8] = &[0x04, 0x02, 0x02];
342    /// Method ID for Zstandard compression.
343    pub const ID_ZSTD: &'static [u8] = &[0x04, 0xF7, 0x11, 0x01];
344    /// Method ID for Brotli compression.
345    pub const ID_BROTLI: &'static [u8] = &[0x04, 0xF7, 0x11, 0x02];
346    /// Method ID for LZ4 compression.
347    pub const ID_LZ4: &'static [u8] = &[0x04, 0xF7, 0x11, 0x04];
348    /// Method ID for LZS compression.
349    pub const ID_LZS: &'static [u8] = &[0x04, 0xF7, 0x11, 0x05];
350    /// Method ID for Lizard compression.
351    pub const ID_LIZARD: &'static [u8] = &[0x04, 0xF7, 0x11, 0x06];
352    /// Method ID for Deflate compression.
353    pub const ID_DEFLATE: &'static [u8] = &[0x04, 0x01, 0x08];
354    /// Method ID for Deflate64 compression.
355    pub const ID_DEFLATE64: &'static [u8] = &[0x04, 0x01, 0x09];
356    /// Method ID for AES256-SHA256 encryption.
357    pub const ID_AES256_SHA256: &'static [u8] = &[0x06, 0xF1, 0x07, 0x01];
358
359    /// COPY method (no compression).
360    pub const COPY: Self = Self("COPY", Self::ID_COPY);
361    /// LZMA compression method.
362    pub const LZMA: Self = Self("LZMA", Self::ID_LZMA);
363    /// LZMA2 compression method.
364    pub const LZMA2: Self = Self("LZMA2", Self::ID_LZMA2);
365    /// PPMD compression method.
366    pub const PPMD: Self = Self("PPMD", Self::ID_PPMD);
367    /// BZIP2 compression method.
368    pub const BZIP2: Self = Self("BZIP2", Self::ID_BZIP2);
369    /// Zstandard compression method.
370    pub const ZSTD: Self = Self("ZSTD", Self::ID_ZSTD);
371    /// Brotli compression method.
372    pub const BROTLI: Self = Self("BROTLI", Self::ID_BROTLI);
373    /// LZ4 compression method.
374    pub const LZ4: Self = Self("LZ4", Self::ID_LZ4);
375    /// LZS compression method.
376    pub const LZS: Self = Self("LZS", Self::ID_LZS);
377    /// Lizard compression method.
378    pub const LIZARD: Self = Self("LIZARD", Self::ID_LIZARD);
379    /// Deflate compression method.
380    pub const DEFLATE: Self = Self("DEFLATE", Self::ID_DEFLATE);
381    /// Deflate64 compression method.
382    pub const DEFLATE64: Self = Self("DEFLATE64", Self::ID_DEFLATE64);
383    /// AES256-SHA256 encryption method.
384    pub const AES256_SHA256: Self = Self("AES256_SHA256", Self::ID_AES256_SHA256);
385
386    /// BCJ x86 filter method.
387    pub const BCJ_X86_FILTER: Self = Self("BCJ_X86", Self::ID_BCJ_X86);
388    /// BCJ PowerPC filter method.
389    pub const BCJ_PPC_FILTER: Self = Self("BCJ_PPC", Self::ID_BCJ_PPC);
390    /// BCJ IA64 filter method.
391    pub const BCJ_IA64_FILTER: Self = Self("BCJ_IA64", Self::ID_BCJ_IA64);
392    /// BCJ ARM filter method.
393    pub const BCJ_ARM_FILTER: Self = Self("BCJ_ARM", Self::ID_BCJ_ARM);
394    /// BCJ ARM64 filter method.
395    pub const BCJ_ARM64_FILTER: Self = Self("BCJ_ARM64", Self::ID_BCJ_ARM64);
396    /// BCJ ARM Thumb filter method.
397    pub const BCJ_ARM_THUMB_FILTER: Self = Self("BCJ_ARM_THUMB", Self::ID_BCJ_ARM_THUMB);
398    /// BCJ SPARC filter method.
399    pub const BCJ_SPARC_FILTER: Self = Self("BCJ_SPARC", Self::ID_BCJ_SPARC);
400    /// BCJ RISC-V filter method.
401    pub const BCJ_RISCV_FILTER: Self = Self("BCJ_RISCV", Self::ID_BCJ_RISCV);
402    /// Delta filter method.
403    pub const DELTA_FILTER: Self = Self("DELTA", Self::ID_DELTA);
404    /// BCJ2 filter method.
405    pub const BCJ2_FILTER: Self = Self("BCJ2", Self::ID_BCJ2);
406
407    const ENCODING_METHODS: &'static [&'static EncoderMethod] = &[
408        &Self::COPY,
409        &Self::LZMA,
410        &Self::LZMA2,
411        &Self::PPMD,
412        &Self::BZIP2,
413        &Self::ZSTD,
414        &Self::BROTLI,
415        &Self::LZ4,
416        &Self::LZS,
417        &Self::LIZARD,
418        &Self::DEFLATE,
419        &Self::DEFLATE64,
420        &Self::AES256_SHA256,
421        &Self::BCJ_X86_FILTER,
422        &Self::BCJ_PPC_FILTER,
423        &Self::BCJ_IA64_FILTER,
424        &Self::BCJ_ARM_FILTER,
425        &Self::BCJ_ARM64_FILTER,
426        &Self::BCJ_ARM_THUMB_FILTER,
427        &Self::BCJ_SPARC_FILTER,
428        &Self::BCJ_RISCV_FILTER,
429        &Self::DELTA_FILTER,
430        &Self::BCJ2_FILTER,
431    ];
432
433    #[inline]
434    /// Returns the human-readable name of this encoder method.
435    pub const fn name(&self) -> &'static str {
436        self.0
437    }
438
439    #[inline]
440    /// Returns the binary ID of this encoder method.
441    pub const fn id(&self) -> &'static [u8] {
442        self.1
443    }
444
445    #[inline]
446    /// Finds an encoder method by its binary ID.
447    ///
448    /// # Arguments
449    /// * `id` - The binary method ID to search for
450    pub fn by_id(id: &[u8]) -> Option<Self> {
451        Self::ENCODING_METHODS
452            .iter()
453            .find(|item| item.id() == id)
454            .cloned()
455            .cloned()
456    }
457}
458
459/// Mapping structure that correlates files, blocks, and pack streams within an archive.
460///
461/// This structure maintains the relationships between archive entries and their
462/// corresponding compression blocks and packed data streams.
463#[derive(Debug, Default, Clone)]
464pub struct StreamMap {
465    pub(crate) block_first_pack_stream_index: Vec<usize>,
466    pub(crate) pack_stream_offsets: Vec<u64>,
467    /// Index of first file for each block.
468    pub block_first_file_index: Vec<usize>,
469    /// Block index for each file (None if file has no data).
470    pub file_block_index: Vec<Option<usize>>,
471}
472
473impl StreamMap {
474    /// Returns the index of the first pack stream for each block.
475    /// Used for mapping blocks to their packed data streams.
476    pub fn block_first_pack_stream_index(&self) -> &[usize] {
477        &self.block_first_pack_stream_index
478    }
479
480    /// Returns byte offsets of each pack stream within the packed data region.
481    /// Combined with pack_pos and SIGNATURE_HEADER_SIZE to get absolute offsets.
482    pub fn pack_stream_offsets(&self) -> &[u64] {
483        &self.pack_stream_offsets
484    }
485}
486
487#[derive(Debug, Clone, Copy)]
488pub(crate) struct StartHeader {
489    pub(crate) next_header_offset: u64,
490    pub(crate) next_header_size: u64,
491    pub(crate) next_header_crc: u64,
492}