sevenz_rust2/
archive.rs

1#[cfg(feature = "compress")]
2use crate::encoder_options::EncoderOptions;
3use crate::{NtTime, bitset::BitSet, block::*};
4
5pub(crate) const SIGNATURE_HEADER_SIZE: u64 = 32;
6pub(crate) const SEVEN_Z_SIGNATURE: &[u8] = &[b'7', b'z', 0xBC, 0xAF, 0x27, 0x1C];
7
8pub(crate) const K_END: u8 = 0x00;
9pub(crate) const K_HEADER: u8 = 0x01;
10pub(crate) const K_ARCHIVE_PROPERTIES: u8 = 0x02;
11pub(crate) const K_ADDITIONAL_STREAMS_INFO: u8 = 0x03;
12pub(crate) const K_MAIN_STREAMS_INFO: u8 = 0x04;
13pub(crate) const K_FILES_INFO: u8 = 0x05;
14pub(crate) const K_PACK_INFO: u8 = 0x06;
15pub(crate) const K_UNPACK_INFO: u8 = 0x07;
16pub(crate) const K_SUB_STREAMS_INFO: u8 = 0x08;
17pub(crate) const K_SIZE: u8 = 0x09;
18pub(crate) const K_CRC: u8 = 0x0A;
19pub(crate) const K_FOLDER: u8 = 0x0B;
20pub(crate) const K_CODERS_UNPACK_SIZE: u8 = 0x0C;
21pub(crate) const K_NUM_UNPACK_STREAM: u8 = 0x0D;
22pub(crate) const K_EMPTY_STREAM: u8 = 0x0E;
23pub(crate) const K_EMPTY_FILE: u8 = 0x0F;
24pub(crate) const K_ANTI: u8 = 0x10;
25pub(crate) const K_NAME: u8 = 0x11;
26pub(crate) const K_C_TIME: u8 = 0x12;
27pub(crate) const K_A_TIME: u8 = 0x13;
28pub(crate) const K_M_TIME: u8 = 0x14;
29pub(crate) const K_WIN_ATTRIBUTES: u8 = 0x15;
30
31/// TODO: Implement reading & writing comments
32#[allow(unused)]
33pub(crate) const K_COMMENT: u8 = 0x16;
34pub(crate) const K_ENCODED_HEADER: u8 = 0x17;
35pub(crate) const K_START_POS: u8 = 0x18;
36pub(crate) const K_DUMMY: u8 = 0x19;
37
38/// Represents a parsed 7z archive structure.
39///
40/// Contains metadata about the archive including files, compression blocks,
41/// and internal structure information necessary for decompression.
42#[derive(Debug, Default, Clone)]
43pub struct Archive {
44    /// Offset from beginning of file + SIGNATURE_HEADER_SIZE to packed streams.
45    pub(crate) pack_pos: u64,
46    pub(crate) pack_sizes: Vec<u64>,
47    pub(crate) pack_crcs_defined: BitSet,
48    pub(crate) pack_crcs: Vec<u64>,
49    pub(crate) sub_streams_info: Option<SubStreamsInfo>,
50    /// Compression blocks in the archive.
51    pub blocks: Vec<Block>,
52    /// File and directory entries in the archive.
53    pub files: Vec<ArchiveEntry>,
54    /// Mapping between files, blocks, and pack streams.
55    pub stream_map: StreamMap,
56    /// Whether this is a solid archive (better compression, slower random access).
57    pub is_solid: bool,
58}
59
60#[derive(Debug, Default, Clone)]
61pub(crate) struct SubStreamsInfo {
62    pub(crate) unpack_sizes: Vec<u64>,
63    pub(crate) has_crc: BitSet,
64    pub(crate) crcs: Vec<u64>,
65}
66
67/// Represents a single file or directory entry within a 7z archive.
68///
69/// Contains metadata about the entry including name, timestamps, attributes,
70/// and size information.
71#[derive(Debug, Default, Clone)]
72pub struct ArchiveEntry {
73    /// Name/path of the entry within the archive.
74    pub name: String,
75    /// Whether this entry has associated data stream.
76    pub has_stream: bool,
77    /// Whether this entry is a directory.
78    pub is_directory: bool,
79    /// Whether this is an anti-item (used for deletion in updates).
80    pub is_anti_item: bool,
81    /// Whether creation date is present.
82    pub has_creation_date: bool,
83    /// Whether last modified date is present.
84    pub has_last_modified_date: bool,
85    /// Whether access date is present.
86    pub has_access_date: bool,
87    /// Creation date and time.
88    pub creation_date: NtTime,
89    /// Last modified date and time.
90    pub last_modified_date: NtTime,
91    /// Last access date and time.
92    pub access_date: NtTime,
93    /// Whether Windows file attributes are present.
94    pub has_windows_attributes: bool,
95    /// Windows file attributes.
96    pub windows_attributes: u32,
97    /// Whether CRC is present.
98    pub has_crc: bool,
99    /// CRC32 checksum of uncompressed data.
100    pub crc: u64,
101    /// CRC32 checksum of compressed data.
102    pub compressed_crc: u64,
103    /// Uncompressed size in bytes.
104    pub size: u64,
105    /// Compressed size in bytes.
106    pub compressed_size: u64,
107}
108
109impl ArchiveEntry {
110    /// Creates a new default archive entry.
111    pub fn new() -> Self {
112        Self::default()
113    }
114
115    /// Creates a new archive entry representing a file.
116    ///
117    /// # Arguments
118    /// * `entry_name` - The name/path of the file within the archive
119    pub fn new_file(entry_name: &str) -> Self {
120        Self {
121            name: entry_name.to_string(),
122            has_stream: true,
123            is_directory: false,
124            ..Default::default()
125        }
126    }
127
128    /// Creates a new archive entry representing a directory.
129    ///
130    /// # Arguments
131    /// * `entry_name` - The name/path of the directory within the archive
132    pub fn new_directory(entry_name: &str) -> Self {
133        Self {
134            name: entry_name.to_string(),
135            has_stream: false,
136            is_directory: true,
137            ..Default::default()
138        }
139    }
140
141    /// Creates a new archive entry from a filesystem path.
142    ///
143    /// Automatically extracts metadata like timestamps and attributes from the filesystem.
144    /// On Windows, backslashes in the entry name are converted to forward slashes.
145    ///
146    /// # Arguments
147    /// * `path` - The filesystem path to extract metadata from
148    /// * `entry_name` - The name/path to use for this entry within the archive
149    pub fn from_path(path: impl AsRef<std::path::Path>, entry_name: String) -> Self {
150        let path = path.as_ref();
151        #[cfg(target_os = "windows")]
152        let entry_name = {
153            let mut name_bytes = entry_name.into_bytes();
154            for b in &mut name_bytes {
155                if *b == b'\\' {
156                    *b = b'/';
157                }
158            }
159            String::from_utf8(name_bytes).unwrap()
160        };
161        let mut entry = ArchiveEntry {
162            name: entry_name,
163            has_stream: path.is_file(),
164            is_directory: path.is_dir(),
165            ..Default::default()
166        };
167
168        if let Ok(meta) = path.metadata() {
169            if let Ok(modified) = meta.modified() {
170                if let Ok(date) = NtTime::try_from(modified) {
171                    entry.last_modified_date = date;
172                    entry.has_last_modified_date = entry.last_modified_date.0 > 0;
173                }
174            }
175            if let Ok(date) = meta.created() {
176                if let Ok(date) = NtTime::try_from(date) {
177                    entry.creation_date = date;
178                    entry.has_creation_date = entry.creation_date.0 > 0;
179                }
180            }
181            if let Ok(date) = meta.accessed() {
182                if let Ok(date) = NtTime::try_from(date) {
183                    entry.access_date = date;
184                    entry.has_access_date = entry.access_date.0 > 0;
185                }
186            }
187        }
188        entry
189    }
190
191    /// Returns the name/path of this entry within the archive.
192    pub fn name(&self) -> &str {
193        self.name.as_ref()
194    }
195
196    /// Returns whether this entry is a directory.
197    pub fn is_directory(&self) -> bool {
198        self.is_directory
199    }
200
201    /// Returns whether this entry has an associated data stream.
202    pub fn has_stream(&self) -> bool {
203        self.has_stream
204    }
205
206    /// Returns the creation date of this entry.
207    pub fn creation_date(&self) -> NtTime {
208        self.creation_date
209    }
210
211    /// Returns the last modified date of this entry.
212    pub fn last_modified_date(&self) -> NtTime {
213        self.last_modified_date
214    }
215
216    /// Returns the uncompressed size of this entry in bytes.
217    pub fn size(&self) -> u64 {
218        self.size
219    }
220
221    /// Returns the Windows file attributes of this entry.
222    pub fn windows_attributes(&self) -> u32 {
223        self.windows_attributes
224    }
225
226    /// Returns the last access date of this entry.
227    pub fn access_date(&self) -> NtTime {
228        self.access_date
229    }
230
231    /// Returns whether this entry is an anti-item (used for deletion in updates).
232    pub fn is_anti_item(&self) -> bool {
233        self.is_anti_item
234    }
235}
236
237/// Configuration for encoding methods when compressing data.
238///
239/// Combines an encoder method with optional encoder-specific options.
240#[cfg_attr(docsrs, doc(cfg(feature = "compress")))]
241#[cfg(feature = "compress")]
242#[derive(Debug, Default)]
243pub struct EncoderConfiguration {
244    /// The encoder method to use.
245    pub method: EncoderMethod,
246    /// Optional encoder-specific options.
247    pub options: Option<EncoderOptions>,
248}
249
250#[cfg(feature = "compress")]
251impl From<EncoderMethod> for EncoderConfiguration {
252    fn from(value: EncoderMethod) -> Self {
253        Self::new(value)
254    }
255}
256
257#[cfg(feature = "compress")]
258impl Clone for EncoderConfiguration {
259    fn clone(&self) -> Self {
260        Self {
261            method: self.method,
262            options: self.options.clone(),
263        }
264    }
265}
266
267#[cfg(feature = "compress")]
268impl EncoderConfiguration {
269    /// Creates a new encoder configuration with the specified method.
270    ///
271    /// # Arguments
272    /// * `method` - The encoder method to use
273    pub fn new(method: EncoderMethod) -> Self {
274        Self {
275            method,
276            options: None,
277        }
278    }
279
280    /// Adds encoder-specific options to this configuration.
281    ///
282    /// # Arguments
283    /// * `options` - The encoder options to apply
284    pub fn with_options(mut self, options: EncoderOptions) -> Self {
285        self.options = Some(options);
286        self
287    }
288}
289
290/// Encoder method that can be chained (filter, compression and encryption).
291#[derive(Debug, Clone, Copy, Eq, PartialEq, Default, Hash)]
292pub struct EncoderMethod(&'static str, &'static [u8]);
293
294impl EncoderMethod {
295    /// Method ID for COPY (no compression).
296    pub const ID_COPY: &'static [u8] = &[0x00];
297    /// Method ID for Delta filter.
298    pub const ID_DELTA: &'static [u8] = &[0x03];
299
300    /// Method ID for LZMA compression.
301    pub const ID_LZMA: &'static [u8] = &[0x03, 0x01, 0x01];
302    /// Method ID for BCJ x86 filter.
303    pub const ID_BCJ_X86: &'static [u8] = &[0x03, 0x03, 0x01, 0x03];
304    /// Method ID for BCJ2 filter.
305    pub const ID_BCJ2: &'static [u8] = &[0x03, 0x03, 0x01, 0x1B];
306    /// Method ID for BCJ PowerPC filter.
307    pub const ID_BCJ_PPC: &'static [u8] = &[0x03, 0x03, 0x02, 0x05];
308    /// Method ID for BCJ IA64 filter.
309    pub const ID_BCJ_IA64: &'static [u8] = &[0x03, 0x03, 0x04, 0x01];
310    /// Method ID for BCJ ARM filter.
311    pub const ID_BCJ_ARM: &'static [u8] = &[0x03, 0x03, 0x05, 0x01];
312    /// Method ID for BCJ ARM64 filter.
313    pub const ID_BCJ_ARM64: &'static [u8] = &[0xA];
314    /// Method ID for BCJ ARM Thumb filter.
315    pub const ID_BCJ_ARM_THUMB: &'static [u8] = &[0x03, 0x03, 0x07, 0x01];
316    /// Method ID for BCJ SPARC filter.
317    pub const ID_BCJ_SPARC: &'static [u8] = &[0x03, 0x03, 0x08, 0x05];
318    /// Method ID for PPMD compression.
319    pub const ID_PPMD: &'static [u8] = &[0x03, 0x04, 0x01];
320
321    /// Method ID for LZMA2 compression.
322    pub const ID_LZMA2: &'static [u8] = &[0x21];
323    /// Method ID for BZIP2 compression.
324    pub const ID_BZIP2: &'static [u8] = &[0x04, 0x02, 0x02];
325    /// Method ID for Zstandard compression.
326    pub const ID_ZSTD: &'static [u8] = &[0x04, 0xF7, 0x11, 0x01];
327    /// Method ID for Brotli compression.
328    pub const ID_BROTLI: &'static [u8] = &[0x04, 0xF7, 0x11, 0x02];
329    /// Method ID for LZ4 compression.
330    pub const ID_LZ4: &'static [u8] = &[0x04, 0xF7, 0x11, 0x04];
331    /// Method ID for LZS compression.
332    pub const ID_LZS: &'static [u8] = &[0x04, 0xF7, 0x11, 0x05];
333    /// Method ID for Lizard compression.
334    pub const ID_LIZARD: &'static [u8] = &[0x04, 0xF7, 0x11, 0x06];
335    /// Method ID for Deflate compression.
336    pub const ID_DEFLATE: &'static [u8] = &[0x04, 0x01, 0x08];
337    /// Method ID for Deflate64 compression.
338    pub const ID_DEFLATE64: &'static [u8] = &[0x04, 0x01, 0x09];
339    /// Method ID for AES256-SHA256 encryption.
340    pub const ID_AES256SHA256: &'static [u8] = &[0x06, 0xF1, 0x07, 0x01];
341
342    /// COPY method (no compression).
343    pub const COPY: Self = Self("COPY", Self::ID_COPY);
344    /// LZMA compression method.
345    pub const LZMA: Self = Self("LZMA", Self::ID_LZMA);
346    /// LZMA2 compression method.
347    pub const LZMA2: Self = Self("LZMA2", Self::ID_LZMA2);
348    /// PPMD compression method.
349    pub const PPMD: Self = Self("PPMD", Self::ID_PPMD);
350    /// BZIP2 compression method.
351    pub const BZIP2: Self = Self("BZIP2", Self::ID_BZIP2);
352    /// Zstandard compression method.
353    pub const ZSTD: Self = Self("ZSTD", Self::ID_ZSTD);
354    /// Brotli compression method.
355    pub const BROTLI: Self = Self("BROTLI", Self::ID_BROTLI);
356    /// LZ4 compression method.
357    pub const LZ4: Self = Self("LZ4", Self::ID_LZ4);
358    /// LZS compression method.
359    pub const LZS: Self = Self("LZS", Self::ID_LZS);
360    /// Lizard compression method.
361    pub const LIZARD: Self = Self("LIZARD", Self::ID_LIZARD);
362    /// Deflate compression method.
363    pub const DEFLATE: Self = Self("DEFLATE", Self::ID_DEFLATE);
364    /// Deflate64 compression method.
365    pub const DEFLATE64: Self = Self("DEFLATE64", Self::ID_DEFLATE64);
366    /// AES256-SHA256 encryption method.
367    pub const AES256SHA256: Self = Self("AES256SHA256", Self::ID_AES256SHA256);
368
369    /// BCJ x86 filter method.
370    pub const BCJ_X86_FILTER: Self = Self("BCJ_X86", Self::ID_BCJ_X86);
371    /// BCJ PowerPC filter method.
372    pub const BCJ_PPC_FILTER: Self = Self("BCJ_PPC", Self::ID_BCJ_PPC);
373    /// BCJ IA64 filter method.
374    pub const BCJ_IA64_FILTER: Self = Self("BCJ_IA64", Self::ID_BCJ_IA64);
375    /// BCJ ARM filter method.
376    pub const BCJ_ARM_FILTER: Self = Self("BCJ_ARM", Self::ID_BCJ_ARM);
377    /// BCJ ARM64 filter method.
378    pub const BCJ_ARM64_FILTER: Self = Self("BCJ_ARM64", Self::ID_BCJ_ARM64);
379    /// BCJ ARM Thumb filter method.
380    pub const BCJ_ARM_THUMB_FILTER: Self = Self("BCJ_ARM_THUMB", Self::ID_BCJ_ARM_THUMB);
381    /// BCJ SPARC filter method.
382    pub const BCJ_SPARC_FILTER: Self = Self("BCJ_SPARC", Self::ID_BCJ_SPARC);
383    /// Delta filter method.
384    pub const DELTA_FILTER: Self = Self("DELTA", Self::ID_DELTA);
385    /// BCJ2 filter method.
386    pub const BCJ2_FILTER: Self = Self("BCJ2", Self::ID_BCJ2);
387
388    const ENCODING_METHODS: &'static [&'static EncoderMethod] = &[
389        &Self::COPY,
390        &Self::LZMA,
391        &Self::LZMA2,
392        &Self::PPMD,
393        &Self::BZIP2,
394        &Self::ZSTD,
395        &Self::BROTLI,
396        &Self::LZ4,
397        &Self::LZS,
398        &Self::LIZARD,
399        &Self::DEFLATE,
400        &Self::DEFLATE64,
401        &Self::AES256SHA256,
402        &Self::BCJ_X86_FILTER,
403        &Self::BCJ_PPC_FILTER,
404        &Self::BCJ_IA64_FILTER,
405        &Self::BCJ_ARM_FILTER,
406        &Self::BCJ_ARM64_FILTER,
407        &Self::BCJ_ARM_THUMB_FILTER,
408        &Self::BCJ_SPARC_FILTER,
409        &Self::DELTA_FILTER,
410        &Self::BCJ2_FILTER,
411    ];
412
413    #[inline]
414    /// Returns the human-readable name of this encoder method.
415    pub const fn name(&self) -> &'static str {
416        self.0
417    }
418
419    #[inline]
420    /// Returns the binary ID of this encoder method.
421    pub const fn id(&self) -> &'static [u8] {
422        self.1
423    }
424
425    #[inline]
426    /// Finds an encoder method by its binary ID.
427    ///
428    /// # Arguments
429    /// * `id` - The binary method ID to search for
430    pub fn by_id(id: &[u8]) -> Option<Self> {
431        Self::ENCODING_METHODS
432            .iter()
433            .find(|item| item.id() == id)
434            .cloned()
435            .cloned()
436    }
437}
438
439/// Mapping structure that correlates files, blocks, and pack streams within an archive.
440///
441/// This structure maintains the relationships between archive entries and their
442/// corresponding compression blocks and packed data streams.
443#[derive(Debug, Default, Clone)]
444pub struct StreamMap {
445    pub(crate) block_first_pack_stream_index: Vec<usize>,
446    pub(crate) pack_stream_offsets: Vec<u64>,
447    /// Index of first file for each block.
448    pub block_first_file_index: Vec<usize>,
449    /// Block index for each file (None if file has no data).
450    pub file_block_index: Vec<Option<usize>>,
451}
452
453#[derive(Debug, Clone, Copy)]
454pub(crate) struct StartHeader {
455    pub(crate) next_header_offset: u64,
456    pub(crate) next_header_size: u64,
457    pub(crate) next_header_crc: u64,
458}