sevenz_rust2/
archive.rs

1#[cfg(feature = "compress")]
2use crate::encoder_options::EncoderOptions;
3use crate::{NtTime, bitset::BitSet, block::*};
4
5pub(crate) const SIGNATURE_HEADER_SIZE: u64 = 32;
6pub(crate) const SEVEN_Z_SIGNATURE: &[u8] = &[b'7', b'z', 0xBC, 0xAF, 0x27, 0x1C];
7
8pub(crate) const K_END: u8 = 0x00;
9pub(crate) const K_HEADER: u8 = 0x01;
10pub(crate) const K_ARCHIVE_PROPERTIES: u8 = 0x02;
11pub(crate) const K_ADDITIONAL_STREAMS_INFO: u8 = 0x03;
12pub(crate) const K_MAIN_STREAMS_INFO: u8 = 0x04;
13pub(crate) const K_FILES_INFO: u8 = 0x05;
14pub(crate) const K_PACK_INFO: u8 = 0x06;
15pub(crate) const K_UNPACK_INFO: u8 = 0x07;
16pub(crate) const K_SUB_STREAMS_INFO: u8 = 0x08;
17pub(crate) const K_SIZE: u8 = 0x09;
18pub(crate) const K_CRC: u8 = 0x0A;
19pub(crate) const K_FOLDER: u8 = 0x0B;
20pub(crate) const K_CODERS_UNPACK_SIZE: u8 = 0x0C;
21pub(crate) const K_NUM_UNPACK_STREAM: u8 = 0x0D;
22pub(crate) const K_EMPTY_STREAM: u8 = 0x0E;
23pub(crate) const K_EMPTY_FILE: u8 = 0x0F;
24pub(crate) const K_ANTI: u8 = 0x10;
25pub(crate) const K_NAME: u8 = 0x11;
26pub(crate) const K_C_TIME: u8 = 0x12;
27pub(crate) const K_A_TIME: u8 = 0x13;
28pub(crate) const K_M_TIME: u8 = 0x14;
29pub(crate) const K_WIN_ATTRIBUTES: u8 = 0x15;
30
31/// TODO: Implement reading & writing comments
32#[allow(unused)]
33pub(crate) const K_COMMENT: u8 = 0x16;
34pub(crate) const K_ENCODED_HEADER: u8 = 0x17;
35pub(crate) const K_START_POS: u8 = 0x18;
36pub(crate) const K_DUMMY: u8 = 0x19;
37
38/// Represents a parsed 7z archive structure.
39///
40/// Contains metadata about the archive including files, compression blocks,
41/// and internal structure information necessary for decompression.
42#[derive(Debug, Default, Clone)]
43pub struct Archive {
44    /// Offset from beginning of file + SIGNATURE_HEADER_SIZE to packed streams.
45    pub(crate) pack_pos: u64,
46    pub(crate) pack_sizes: Vec<u64>,
47    pub(crate) pack_crcs_defined: BitSet,
48    pub(crate) pack_crcs: Vec<u64>,
49    pub(crate) sub_streams_info: Option<SubStreamsInfo>,
50    /// Compression blocks in the archive.
51    pub blocks: Vec<Block>,
52    /// File and directory entries in the archive.
53    pub files: Vec<ArchiveEntry>,
54    /// Mapping between files, blocks, and pack streams.
55    pub stream_map: StreamMap,
56    /// Whether this is a solid archive (better compression, slower random access).
57    pub is_solid: bool,
58}
59
60#[derive(Debug, Default, Clone)]
61pub(crate) struct SubStreamsInfo {
62    pub(crate) unpack_sizes: Vec<u64>,
63    pub(crate) has_crc: BitSet,
64    pub(crate) crcs: Vec<u64>,
65}
66
67/// Represents a single file or directory entry within a 7z archive.
68///
69/// Contains metadata about the entry including name, timestamps, attributes,
70/// and size information.
71#[derive(Debug, Default, Clone)]
72pub struct ArchiveEntry {
73    /// Name/path of the entry within the archive.
74    pub name: String,
75    /// Whether this entry has associated data stream.
76    pub has_stream: bool,
77    /// Whether this entry is a directory.
78    pub is_directory: bool,
79    /// Whether this is an anti-item (used for deletion in updates).
80    pub is_anti_item: bool,
81    /// Whether creation date is present.
82    pub has_creation_date: bool,
83    /// Whether last modified date is present.
84    pub has_last_modified_date: bool,
85    /// Whether access date is present.
86    pub has_access_date: bool,
87    /// Creation date and time.
88    pub creation_date: NtTime,
89    /// Last modified date and time.
90    pub last_modified_date: NtTime,
91    /// Last access date and time.
92    pub access_date: NtTime,
93    /// Whether Windows file attributes are present.
94    pub has_windows_attributes: bool,
95    /// Windows file attributes.
96    pub windows_attributes: u32,
97    /// Whether CRC is present.
98    pub has_crc: bool,
99    /// CRC32 checksum of uncompressed data.
100    pub crc: u64,
101    /// CRC32 checksum of compressed data.
102    pub compressed_crc: u64,
103    /// Uncompressed size in bytes.
104    pub size: u64,
105    /// Compressed size in bytes.
106    pub compressed_size: u64,
107}
108
109impl ArchiveEntry {
110    /// Creates a new default archive entry.
111    pub fn new() -> Self {
112        Self::default()
113    }
114
115    /// Creates a new archive entry representing a file.
116    ///
117    /// # Arguments
118    /// * `entry_name` - The name/path of the file within the archive
119    pub fn new_file(entry_name: &str) -> Self {
120        Self {
121            name: entry_name.to_string(),
122            has_stream: true,
123            is_directory: false,
124            ..Default::default()
125        }
126    }
127
128    /// Creates a new archive entry representing a directory.
129    ///
130    /// # Arguments
131    /// * `entry_name` - The name/path of the directory within the archive
132    pub fn new_directory(entry_name: &str) -> Self {
133        Self {
134            name: entry_name.to_string(),
135            has_stream: false,
136            is_directory: true,
137            ..Default::default()
138        }
139    }
140
141    /// Creates a new archive entry from a filesystem path.
142    ///
143    /// Automatically extracts metadata like timestamps and attributes from the filesystem.
144    /// On Windows, backslashes in the entry name are converted to forward slashes.
145    ///
146    /// # Arguments
147    /// * `path` - The filesystem path to extract metadata from
148    /// * `entry_name` - The name/path to use for this entry within the archive
149    pub fn from_path(path: impl AsRef<std::path::Path>, entry_name: String) -> Self {
150        let path = path.as_ref();
151        #[cfg(target_os = "windows")]
152        let entry_name = {
153            let mut name_bytes = entry_name.into_bytes();
154            for b in &mut name_bytes {
155                if *b == b'\\' {
156                    *b = b'/';
157                }
158            }
159            String::from_utf8(name_bytes).unwrap()
160        };
161        let mut entry = ArchiveEntry {
162            name: entry_name,
163            has_stream: path.is_file(),
164            is_directory: path.is_dir(),
165            ..Default::default()
166        };
167
168        if let Ok(meta) = path.metadata() {
169            if let Ok(modified) = meta.modified() {
170                if let Ok(date) = NtTime::try_from(modified) {
171                    entry.last_modified_date = date;
172                    entry.has_last_modified_date = entry.last_modified_date.0 > 0;
173                }
174            }
175            if let Ok(date) = meta.created() {
176                if let Ok(date) = NtTime::try_from(date) {
177                    entry.creation_date = date;
178                    entry.has_creation_date = entry.creation_date.0 > 0;
179                }
180            }
181            if let Ok(date) = meta.accessed() {
182                if let Ok(date) = NtTime::try_from(date) {
183                    entry.access_date = date;
184                    entry.has_access_date = entry.access_date.0 > 0;
185                }
186            }
187        }
188        entry
189    }
190
191    /// Returns the name/path of this entry within the archive.
192    pub fn name(&self) -> &str {
193        self.name.as_ref()
194    }
195
196    /// Returns whether this entry is a directory.
197    pub fn is_directory(&self) -> bool {
198        self.is_directory
199    }
200
201    /// Returns whether this entry has an associated data stream.
202    pub fn has_stream(&self) -> bool {
203        self.has_stream
204    }
205
206    /// Returns the creation date of this entry.
207    pub fn creation_date(&self) -> NtTime {
208        self.creation_date
209    }
210
211    /// Returns the last modified date of this entry.
212    pub fn last_modified_date(&self) -> NtTime {
213        self.last_modified_date
214    }
215
216    /// Returns the uncompressed size of this entry in bytes.
217    pub fn size(&self) -> u64 {
218        self.size
219    }
220
221    /// Returns the Windows file attributes of this entry.
222    pub fn windows_attributes(&self) -> u32 {
223        self.windows_attributes
224    }
225
226    /// Returns the last access date of this entry.
227    pub fn access_date(&self) -> NtTime {
228        self.access_date
229    }
230
231    /// Returns whether this entry is an anti-item (used for deletion in updates).
232    pub fn is_anti_item(&self) -> bool {
233        self.is_anti_item
234    }
235}
236
237/// Configuration for encoding methods when compressing data.
238///
239/// Combines an encoder method with optional encoder-specific options.
240#[cfg(feature = "compress")]
241#[derive(Debug, Default)]
242pub struct EncoderConfiguration {
243    /// The encoder method to use.
244    pub method: EncoderMethod,
245    /// Optional encoder-specific options.
246    pub options: Option<EncoderOptions>,
247}
248
249#[cfg(feature = "compress")]
250impl From<EncoderMethod> for EncoderConfiguration {
251    fn from(value: EncoderMethod) -> Self {
252        Self::new(value)
253    }
254}
255
256#[cfg(feature = "compress")]
257impl Clone for EncoderConfiguration {
258    fn clone(&self) -> Self {
259        Self {
260            method: self.method,
261            options: self.options.clone(),
262        }
263    }
264}
265
266#[cfg(feature = "compress")]
267impl EncoderConfiguration {
268    /// Creates a new encoder configuration with the specified method.
269    ///
270    /// # Arguments
271    /// * `method` - The encoder method to use
272    pub fn new(method: EncoderMethod) -> Self {
273        Self {
274            method,
275            options: None,
276        }
277    }
278
279    /// Adds encoder-specific options to this configuration.
280    ///
281    /// # Arguments
282    /// * `options` - The encoder options to apply
283    pub fn with_options(mut self, options: EncoderOptions) -> Self {
284        self.options = Some(options);
285        self
286    }
287}
288
289/// Encoder method that can be chained (filter, compression and encryption).
290#[derive(Debug, Clone, Copy, Eq, PartialEq, Default, Hash)]
291pub struct EncoderMethod(&'static str, &'static [u8]);
292
293impl EncoderMethod {
294    /// Method ID for COPY (no compression).
295    pub const ID_COPY: &'static [u8] = &[0x00];
296    /// Method ID for Delta filter.
297    pub const ID_DELTA: &'static [u8] = &[0x03];
298
299    /// Method ID for LZMA compression.
300    pub const ID_LZMA: &'static [u8] = &[0x03, 0x01, 0x01];
301    /// Method ID for BCJ x86 filter.
302    pub const ID_BCJ_X86: &'static [u8] = &[0x03, 0x03, 0x01, 0x03];
303    /// Method ID for BCJ2 filter.
304    pub const ID_BCJ2: &'static [u8] = &[0x03, 0x03, 0x01, 0x1B];
305    /// Method ID for BCJ PowerPC filter.
306    pub const ID_BCJ_PPC: &'static [u8] = &[0x03, 0x03, 0x02, 0x05];
307    /// Method ID for BCJ IA64 filter.
308    pub const ID_BCJ_IA64: &'static [u8] = &[0x03, 0x03, 0x04, 0x01];
309    /// Method ID for BCJ ARM filter.
310    pub const ID_BCJ_ARM: &'static [u8] = &[0x03, 0x03, 0x05, 0x01];
311    /// Method ID for BCJ ARM64 filter.
312    pub const ID_BCJ_ARM64: &'static [u8] = &[0xA];
313    /// Method ID for BCJ ARM Thumb filter.
314    pub const ID_BCJ_ARM_THUMB: &'static [u8] = &[0x03, 0x03, 0x07, 0x01];
315    /// Method ID for BCJ SPARC filter.
316    pub const ID_BCJ_SPARC: &'static [u8] = &[0x03, 0x03, 0x08, 0x05];
317    /// Method ID for BCJ RISCV filter.
318    pub const ID_BCJ_RISCV: &'static [u8] = &[0xB];
319    /// Method ID for PPMD compression.
320    pub const ID_PPMD: &'static [u8] = &[0x03, 0x04, 0x01];
321
322    /// Method ID for LZMA2 compression.
323    pub const ID_LZMA2: &'static [u8] = &[0x21];
324    /// Method ID for BZIP2 compression.
325    pub const ID_BZIP2: &'static [u8] = &[0x04, 0x02, 0x02];
326    /// Method ID for Zstandard compression.
327    pub const ID_ZSTD: &'static [u8] = &[0x04, 0xF7, 0x11, 0x01];
328    /// Method ID for Brotli compression.
329    pub const ID_BROTLI: &'static [u8] = &[0x04, 0xF7, 0x11, 0x02];
330    /// Method ID for LZ4 compression.
331    pub const ID_LZ4: &'static [u8] = &[0x04, 0xF7, 0x11, 0x04];
332    /// Method ID for LZS compression.
333    pub const ID_LZS: &'static [u8] = &[0x04, 0xF7, 0x11, 0x05];
334    /// Method ID for Lizard compression.
335    pub const ID_LIZARD: &'static [u8] = &[0x04, 0xF7, 0x11, 0x06];
336    /// Method ID for Deflate compression.
337    pub const ID_DEFLATE: &'static [u8] = &[0x04, 0x01, 0x08];
338    /// Method ID for Deflate64 compression.
339    pub const ID_DEFLATE64: &'static [u8] = &[0x04, 0x01, 0x09];
340    /// Method ID for AES256-SHA256 encryption.
341    pub const ID_AES256_SHA256: &'static [u8] = &[0x06, 0xF1, 0x07, 0x01];
342
343    /// COPY method (no compression).
344    pub const COPY: Self = Self("COPY", Self::ID_COPY);
345    /// LZMA compression method.
346    pub const LZMA: Self = Self("LZMA", Self::ID_LZMA);
347    /// LZMA2 compression method.
348    pub const LZMA2: Self = Self("LZMA2", Self::ID_LZMA2);
349    /// PPMD compression method.
350    pub const PPMD: Self = Self("PPMD", Self::ID_PPMD);
351    /// BZIP2 compression method.
352    pub const BZIP2: Self = Self("BZIP2", Self::ID_BZIP2);
353    /// Zstandard compression method.
354    pub const ZSTD: Self = Self("ZSTD", Self::ID_ZSTD);
355    /// Brotli compression method.
356    pub const BROTLI: Self = Self("BROTLI", Self::ID_BROTLI);
357    /// LZ4 compression method.
358    pub const LZ4: Self = Self("LZ4", Self::ID_LZ4);
359    /// LZS compression method.
360    pub const LZS: Self = Self("LZS", Self::ID_LZS);
361    /// Lizard compression method.
362    pub const LIZARD: Self = Self("LIZARD", Self::ID_LIZARD);
363    /// Deflate compression method.
364    pub const DEFLATE: Self = Self("DEFLATE", Self::ID_DEFLATE);
365    /// Deflate64 compression method.
366    pub const DEFLATE64: Self = Self("DEFLATE64", Self::ID_DEFLATE64);
367    /// AES256-SHA256 encryption method.
368    pub const AES256_SHA256: Self = Self("AES256_SHA256", Self::ID_AES256_SHA256);
369
370    /// BCJ x86 filter method.
371    pub const BCJ_X86_FILTER: Self = Self("BCJ_X86", Self::ID_BCJ_X86);
372    /// BCJ PowerPC filter method.
373    pub const BCJ_PPC_FILTER: Self = Self("BCJ_PPC", Self::ID_BCJ_PPC);
374    /// BCJ IA64 filter method.
375    pub const BCJ_IA64_FILTER: Self = Self("BCJ_IA64", Self::ID_BCJ_IA64);
376    /// BCJ ARM filter method.
377    pub const BCJ_ARM_FILTER: Self = Self("BCJ_ARM", Self::ID_BCJ_ARM);
378    /// BCJ ARM64 filter method.
379    pub const BCJ_ARM64_FILTER: Self = Self("BCJ_ARM64", Self::ID_BCJ_ARM64);
380    /// BCJ ARM Thumb filter method.
381    pub const BCJ_ARM_THUMB_FILTER: Self = Self("BCJ_ARM_THUMB", Self::ID_BCJ_ARM_THUMB);
382    /// BCJ SPARC filter method.
383    pub const BCJ_SPARC_FILTER: Self = Self("BCJ_SPARC", Self::ID_BCJ_SPARC);
384    /// BCJ RISC-V filter method.
385    pub const BCJ_RISCV_FILTER: Self = Self("BCJ_RISCV", Self::ID_BCJ_RISCV);
386    /// Delta filter method.
387    pub const DELTA_FILTER: Self = Self("DELTA", Self::ID_DELTA);
388    /// BCJ2 filter method.
389    pub const BCJ2_FILTER: Self = Self("BCJ2", Self::ID_BCJ2);
390
391    const ENCODING_METHODS: &'static [&'static EncoderMethod] = &[
392        &Self::COPY,
393        &Self::LZMA,
394        &Self::LZMA2,
395        &Self::PPMD,
396        &Self::BZIP2,
397        &Self::ZSTD,
398        &Self::BROTLI,
399        &Self::LZ4,
400        &Self::LZS,
401        &Self::LIZARD,
402        &Self::DEFLATE,
403        &Self::DEFLATE64,
404        &Self::AES256_SHA256,
405        &Self::BCJ_X86_FILTER,
406        &Self::BCJ_PPC_FILTER,
407        &Self::BCJ_IA64_FILTER,
408        &Self::BCJ_ARM_FILTER,
409        &Self::BCJ_ARM64_FILTER,
410        &Self::BCJ_ARM_THUMB_FILTER,
411        &Self::BCJ_SPARC_FILTER,
412        &Self::BCJ_RISCV_FILTER,
413        &Self::DELTA_FILTER,
414        &Self::BCJ2_FILTER,
415    ];
416
417    #[inline]
418    /// Returns the human-readable name of this encoder method.
419    pub const fn name(&self) -> &'static str {
420        self.0
421    }
422
423    #[inline]
424    /// Returns the binary ID of this encoder method.
425    pub const fn id(&self) -> &'static [u8] {
426        self.1
427    }
428
429    #[inline]
430    /// Finds an encoder method by its binary ID.
431    ///
432    /// # Arguments
433    /// * `id` - The binary method ID to search for
434    pub fn by_id(id: &[u8]) -> Option<Self> {
435        Self::ENCODING_METHODS
436            .iter()
437            .find(|item| item.id() == id)
438            .cloned()
439            .cloned()
440    }
441}
442
443/// Mapping structure that correlates files, blocks, and pack streams within an archive.
444///
445/// This structure maintains the relationships between archive entries and their
446/// corresponding compression blocks and packed data streams.
447#[derive(Debug, Default, Clone)]
448pub struct StreamMap {
449    pub(crate) block_first_pack_stream_index: Vec<usize>,
450    pub(crate) pack_stream_offsets: Vec<u64>,
451    /// Index of first file for each block.
452    pub block_first_file_index: Vec<usize>,
453    /// Block index for each file (None if file has no data).
454    pub file_block_index: Vec<Option<usize>>,
455}
456
457#[derive(Debug, Clone, Copy)]
458pub(crate) struct StartHeader {
459    pub(crate) next_header_offset: u64,
460    pub(crate) next_header_size: u64,
461    pub(crate) next_header_crc: u64,
462}