async_sevenz/
archive.rs

1#[cfg(feature = "compress")]
2use crate::encoder_options::EncoderOptions;
3use crate::{NtTime, bitset::BitSet, block::*};
4
5pub(crate) const SIGNATURE_HEADER_SIZE: u64 = 32;
6pub(crate) const SEVEN_Z_SIGNATURE: &[u8] = &[b'7', b'z', 0xBC, 0xAF, 0x27, 0x1C];
7
8pub(crate) const K_END: u8 = 0x00;
9pub(crate) const K_HEADER: u8 = 0x01;
10pub(crate) const K_ARCHIVE_PROPERTIES: u8 = 0x02;
11pub(crate) const K_ADDITIONAL_STREAMS_INFO: u8 = 0x03;
12pub(crate) const K_MAIN_STREAMS_INFO: u8 = 0x04;
13pub(crate) const K_FILES_INFO: u8 = 0x05;
14pub(crate) const K_PACK_INFO: u8 = 0x06;
15pub(crate) const K_UNPACK_INFO: u8 = 0x07;
16pub(crate) const K_SUB_STREAMS_INFO: u8 = 0x08;
17pub(crate) const K_SIZE: u8 = 0x09;
18pub(crate) const K_CRC: u8 = 0x0A;
19pub(crate) const K_FOLDER: u8 = 0x0B;
20pub(crate) const K_CODERS_UNPACK_SIZE: u8 = 0x0C;
21pub(crate) const K_NUM_UNPACK_STREAM: u8 = 0x0D;
22pub(crate) const K_EMPTY_STREAM: u8 = 0x0E;
23pub(crate) const K_EMPTY_FILE: u8 = 0x0F;
24pub(crate) const K_ANTI: u8 = 0x10;
25pub(crate) const K_NAME: u8 = 0x11;
26pub(crate) const K_C_TIME: u8 = 0x12;
27pub(crate) const K_A_TIME: u8 = 0x13;
28pub(crate) const K_M_TIME: u8 = 0x14;
29pub(crate) const K_WIN_ATTRIBUTES: u8 = 0x15;
30
31/// TODO: Implement reading & writing comments
32pub const K_COMMENT: u8 = 0x16;
33pub(crate) const K_ENCODED_HEADER: u8 = 0x17;
34pub(crate) const K_START_POS: u8 = 0x18;
35pub(crate) const K_DUMMY: u8 = 0x19;
36
37/// Represents a parsed 7z archive structure.
38///
39/// Contains metadata about the archive including files, compression blocks,
40/// and internal structure information necessary for decompression.
41#[derive(Debug, Default, Clone)]
42pub struct Archive {
43    /// Offset from beginning of file + SIGNATURE_HEADER_SIZE to packed streams.
44    pub(crate) pack_pos: u64,
45    pub(crate) pack_sizes: Vec<u64>,
46    pub(crate) pack_crcs_defined: BitSet,
47    pub(crate) pack_crcs: Vec<u64>,
48    pub(crate) sub_streams_info: Option<SubStreamsInfo>,
49    /// Compression blocks in the archive.
50    pub blocks: Vec<Block>,
51    /// File and directory entries in the archive.
52    pub files: Vec<ArchiveEntry>,
53    /// Mapping between files, blocks, and pack streams.
54    pub stream_map: StreamMap,
55    /// Whether this is a solid archive (better compression, slower random access).
56    pub is_solid: bool,
57}
58
59#[derive(Debug, Default, Clone)]
60pub(crate) struct SubStreamsInfo {
61    pub(crate) unpack_sizes: Vec<u64>,
62    pub(crate) has_crc: BitSet,
63    pub(crate) crcs: Vec<u64>,
64}
65
66/// Represents a single file or directory entry within a 7z archive.
67///
68/// Contains metadata about the entry including name, timestamps, attributes,
69/// and size information.
70#[derive(Debug, Default, Clone)]
71pub struct ArchiveEntry {
72    /// Name/path of the entry within the archive.
73    pub name: String,
74    /// Whether this entry has associated data stream.
75    pub has_stream: bool,
76    /// Whether this entry is a directory.
77    pub is_directory: bool,
78    /// Whether this is an anti-item (used for deletion in updates).
79    pub is_anti_item: bool,
80    /// Whether creation date is present.
81    pub has_creation_date: bool,
82    /// Whether last modified date is present.
83    pub has_last_modified_date: bool,
84    /// Whether access date is present.
85    pub has_access_date: bool,
86    /// Creation date and time.
87    pub creation_date: NtTime,
88    /// Last modified date and time.
89    pub last_modified_date: NtTime,
90    /// Last access date and time.
91    pub access_date: NtTime,
92    /// Whether Windows file attributes are present.
93    pub has_windows_attributes: bool,
94    /// Windows file attributes.
95    pub windows_attributes: u32,
96    /// Whether CRC is present.
97    pub has_crc: bool,
98    /// CRC32 checksum of uncompressed data.
99    pub crc: u64,
100    /// CRC32 checksum of compressed data.
101    pub compressed_crc: u64,
102    /// Uncompressed size in bytes.
103    pub size: u64,
104    /// Compressed size in bytes.
105    pub compressed_size: u64,
106}
107
108impl ArchiveEntry {
109    /// Creates a new default archive entry.
110    pub fn new() -> Self {
111        Self::default()
112    }
113
114    /// Creates a new archive entry representing a file.
115    ///
116    /// # Arguments
117    /// * `entry_name` - The name/path of the file within the archive
118    pub fn new_file(entry_name: &str) -> Self {
119        Self {
120            name: entry_name.to_string(),
121            has_stream: true,
122            is_directory: false,
123            ..Default::default()
124        }
125    }
126
127    /// Creates a new archive entry representing a directory.
128    ///
129    /// # Arguments
130    /// * `entry_name` - The name/path of the directory within the archive
131    pub fn new_directory(entry_name: &str) -> Self {
132        Self {
133            name: entry_name.to_string(),
134            has_stream: false,
135            is_directory: true,
136            ..Default::default()
137        }
138    }
139
140    /// Creates a new archive entry from a filesystem path.
141    ///
142    /// Automatically extracts metadata like timestamps and attributes from the filesystem.
143    /// On Windows, backslashes in the entry name are converted to forward slashes.
144    ///
145    /// # Arguments
146    /// * `path` - The filesystem path to extract metadata from
147    /// * `entry_name` - The name/path to use for this entry within the archive
148    pub async fn from_path(path: impl AsRef<std::path::Path>, entry_name: String) -> Self {
149        let path = path.as_ref();
150        #[cfg(target_os = "windows")]
151        let entry_name = {
152            let mut name_bytes = entry_name.into_bytes();
153            for b in &mut name_bytes {
154                if *b == b'\\' {
155                    *b = b'/';
156                }
157            }
158            String::from_utf8(name_bytes).unwrap()
159        };
160        let mut entry = ArchiveEntry {
161            name: entry_name,
162            has_stream: path.is_file(),
163            is_directory: path.is_dir(),
164            ..Default::default()
165        };
166
167        if let Ok(meta) = async_fs::metadata(path).await {
168            if let Ok(modified) = meta.modified() {
169                if let Ok(date) = NtTime::try_from(modified) {
170                    entry.last_modified_date = date;
171                    entry.has_last_modified_date = entry.last_modified_date.0 > 0;
172                }
173            }
174            if let Ok(date) = meta.created() {
175                if let Ok(date) = NtTime::try_from(date) {
176                    entry.creation_date = date;
177                    entry.has_creation_date = entry.creation_date.0 > 0;
178                }
179            }
180            if let Ok(date) = meta.accessed() {
181                if let Ok(date) = NtTime::try_from(date) {
182                    entry.access_date = date;
183                    entry.has_access_date = entry.access_date.0 > 0;
184                }
185            }
186        }
187        entry
188    }
189
190    /// Returns the name/path of this entry within the archive.
191    pub fn name(&self) -> &str {
192        self.name.as_ref()
193    }
194
195    /// Returns whether this entry is a directory.
196    pub fn is_directory(&self) -> bool {
197        self.is_directory
198    }
199
200    /// Returns whether this entry has an associated data stream.
201    pub fn has_stream(&self) -> bool {
202        self.has_stream
203    }
204
205    /// Returns the creation date of this entry.
206    pub fn creation_date(&self) -> NtTime {
207        self.creation_date
208    }
209
210    /// Returns the last modified date of this entry.
211    pub fn last_modified_date(&self) -> NtTime {
212        self.last_modified_date
213    }
214
215    /// Returns the uncompressed size of this entry in bytes.
216    pub fn size(&self) -> u64 {
217        self.size
218    }
219
220    /// Returns the Windows file attributes of this entry.
221    pub fn windows_attributes(&self) -> u32 {
222        self.windows_attributes
223    }
224
225    /// Returns the last access date of this entry.
226    pub fn access_date(&self) -> NtTime {
227        self.access_date
228    }
229
230    /// Returns whether this entry is an anti-item (used for deletion in updates).
231    pub fn is_anti_item(&self) -> bool {
232        self.is_anti_item
233    }
234}
235
236/// Configuration for encoding methods when compressing data.
237///
238/// Combines an encoder method with optional encoder-specific options.
239#[cfg(feature = "compress")]
240#[derive(Debug, Default)]
241pub struct EncoderConfiguration {
242    /// The encoder method to use.
243    pub method: EncoderMethod,
244    /// Optional encoder-specific options.
245    pub options: Option<EncoderOptions>,
246}
247
248#[cfg(feature = "compress")]
249impl From<EncoderMethod> for EncoderConfiguration {
250    fn from(value: EncoderMethod) -> Self {
251        Self::new(value)
252    }
253}
254
255#[cfg(feature = "compress")]
256impl Clone for EncoderConfiguration {
257    fn clone(&self) -> Self {
258        Self {
259            method: self.method,
260            options: self.options.clone(),
261        }
262    }
263}
264
265#[cfg(feature = "compress")]
266impl EncoderConfiguration {
267    /// Creates a new encoder configuration with the specified method.
268    ///
269    /// # Arguments
270    /// * `method` - The encoder method to use
271    pub fn new(method: EncoderMethod) -> Self {
272        Self {
273            method,
274            options: None,
275        }
276    }
277
278    /// Adds encoder-specific options to this configuration.
279    ///
280    /// # Arguments
281    /// * `options` - The encoder options to apply
282    pub fn with_options(mut self, options: EncoderOptions) -> Self {
283        self.options = Some(options);
284        self
285    }
286}
287
288/// Encoder method that can be chained (filter, compression and encryption).
289#[derive(Debug, Clone, Copy, Eq, PartialEq, Default, Hash)]
290pub struct EncoderMethod(&'static str, &'static [u8]);
291
292impl EncoderMethod {
293    /// Method ID for COPY (no compression).
294    pub const ID_COPY: &'static [u8] = &[0x00];
295    /// Method ID for Delta filter.
296    pub const ID_DELTA: &'static [u8] = &[0x03];
297
298    /// Method ID for LZMA compression.
299    pub const ID_LZMA: &'static [u8] = &[0x03, 0x01, 0x01];
300    /// Method ID for BCJ x86 filter.
301    pub const ID_BCJ_X86: &'static [u8] = &[0x03, 0x03, 0x01, 0x03];
302    /// Method ID for BCJ2 filter.
303    pub const ID_BCJ2: &'static [u8] = &[0x03, 0x03, 0x01, 0x1B];
304    /// Method ID for BCJ PowerPC filter.
305    pub const ID_BCJ_PPC: &'static [u8] = &[0x03, 0x03, 0x02, 0x05];
306    /// Method ID for BCJ IA64 filter.
307    pub const ID_BCJ_IA64: &'static [u8] = &[0x03, 0x03, 0x04, 0x01];
308    /// Method ID for BCJ ARM filter.
309    pub const ID_BCJ_ARM: &'static [u8] = &[0x03, 0x03, 0x05, 0x01];
310    /// Method ID for BCJ ARM64 filter.
311    pub const ID_BCJ_ARM64: &'static [u8] = &[0xA];
312    /// Method ID for BCJ ARM Thumb filter.
313    pub const ID_BCJ_ARM_THUMB: &'static [u8] = &[0x03, 0x03, 0x07, 0x01];
314    /// Method ID for BCJ SPARC filter.
315    pub const ID_BCJ_SPARC: &'static [u8] = &[0x03, 0x03, 0x08, 0x05];
316    /// Method ID for BCJ RISCV filter.
317    pub const ID_BCJ_RISCV: &'static [u8] = &[0xB];
318    /// Method ID for PPMD compression.
319    pub const ID_PPMD: &'static [u8] = &[0x03, 0x04, 0x01];
320
321    /// Method ID for LZMA2 compression.
322    pub const ID_LZMA2: &'static [u8] = &[0x21];
323    /// Method ID for BZIP2 compression.
324    pub const ID_BZIP2: &'static [u8] = &[0x04, 0x02, 0x02];
325    /// Method ID for Zstandard compression.
326    pub const ID_ZSTD: &'static [u8] = &[0x04, 0xF7, 0x11, 0x01];
327    /// Method ID for Brotli compression.
328    pub const ID_BROTLI: &'static [u8] = &[0x04, 0xF7, 0x11, 0x02];
329    /// Method ID for LZ4 compression.
330    pub const ID_LZ4: &'static [u8] = &[0x04, 0xF7, 0x11, 0x04];
331    /// Method ID for LZS compression.
332    pub const ID_LZS: &'static [u8] = &[0x04, 0xF7, 0x11, 0x05];
333    /// Method ID for Lizard compression.
334    pub const ID_LIZARD: &'static [u8] = &[0x04, 0xF7, 0x11, 0x06];
335    /// Method ID for Deflate compression.
336    pub const ID_DEFLATE: &'static [u8] = &[0x04, 0x01, 0x08];
337    /// Method ID for Deflate64 compression.
338    pub const ID_DEFLATE64: &'static [u8] = &[0x04, 0x01, 0x09];
339    /// Method ID for AES256-SHA256 encryption.
340    pub const ID_AES256_SHA256: &'static [u8] = &[0x06, 0xF1, 0x07, 0x01];
341
342    /// COPY method (no compression).
343    pub const COPY: Self = Self("COPY", Self::ID_COPY);
344    /// LZMA compression method.
345    pub const LZMA: Self = Self("LZMA", Self::ID_LZMA);
346    /// LZMA2 compression method.
347    pub const LZMA2: Self = Self("LZMA2", Self::ID_LZMA2);
348    /// PPMD compression method.
349    pub const PPMD: Self = Self("PPMD", Self::ID_PPMD);
350    /// BZIP2 compression method.
351    pub const BZIP2: Self = Self("BZIP2", Self::ID_BZIP2);
352    /// Zstandard compression method.
353    pub const ZSTD: Self = Self("ZSTD", Self::ID_ZSTD);
354    /// Brotli compression method.
355    pub const BROTLI: Self = Self("BROTLI", Self::ID_BROTLI);
356    /// LZ4 compression method.
357    pub const LZ4: Self = Self("LZ4", Self::ID_LZ4);
358    /// LZS compression method.
359    pub const LZS: Self = Self("LZS", Self::ID_LZS);
360    /// Lizard compression method.
361    pub const LIZARD: Self = Self("LIZARD", Self::ID_LIZARD);
362    /// Deflate compression method.
363    pub const DEFLATE: Self = Self("DEFLATE", Self::ID_DEFLATE);
364    /// Deflate64 compression method.
365    pub const DEFLATE64: Self = Self("DEFLATE64", Self::ID_DEFLATE64);
366    /// AES256-SHA256 encryption method.
367    pub const AES256_SHA256: Self = Self("AES256_SHA256", Self::ID_AES256_SHA256);
368
369    /// BCJ x86 filter method.
370    pub const BCJ_X86_FILTER: Self = Self("BCJ_X86", Self::ID_BCJ_X86);
371    /// BCJ PowerPC filter method.
372    pub const BCJ_PPC_FILTER: Self = Self("BCJ_PPC", Self::ID_BCJ_PPC);
373    /// BCJ IA64 filter method.
374    pub const BCJ_IA64_FILTER: Self = Self("BCJ_IA64", Self::ID_BCJ_IA64);
375    /// BCJ ARM filter method.
376    pub const BCJ_ARM_FILTER: Self = Self("BCJ_ARM", Self::ID_BCJ_ARM);
377    /// BCJ ARM64 filter method.
378    pub const BCJ_ARM64_FILTER: Self = Self("BCJ_ARM64", Self::ID_BCJ_ARM64);
379    /// BCJ ARM Thumb filter method.
380    pub const BCJ_ARM_THUMB_FILTER: Self = Self("BCJ_ARM_THUMB", Self::ID_BCJ_ARM_THUMB);
381    /// BCJ SPARC filter method.
382    pub const BCJ_SPARC_FILTER: Self = Self("BCJ_SPARC", Self::ID_BCJ_SPARC);
383    /// BCJ RISC-V filter method.
384    pub const BCJ_RISCV_FILTER: Self = Self("BCJ_RISCV", Self::ID_BCJ_RISCV);
385    /// Delta filter method.
386    pub const DELTA_FILTER: Self = Self("DELTA", Self::ID_DELTA);
387    /// BCJ2 filter method.
388    pub const BCJ2_FILTER: Self = Self("BCJ2", Self::ID_BCJ2);
389
390    const ENCODING_METHODS: &'static [&'static EncoderMethod] = &[
391        &Self::COPY,
392        &Self::LZMA,
393        &Self::LZMA2,
394        &Self::PPMD,
395        &Self::BZIP2,
396        &Self::ZSTD,
397        &Self::BROTLI,
398        &Self::LZ4,
399        &Self::LZS,
400        &Self::LIZARD,
401        &Self::DEFLATE,
402        &Self::DEFLATE64,
403        &Self::AES256_SHA256,
404        &Self::BCJ_X86_FILTER,
405        &Self::BCJ_PPC_FILTER,
406        &Self::BCJ_IA64_FILTER,
407        &Self::BCJ_ARM_FILTER,
408        &Self::BCJ_ARM64_FILTER,
409        &Self::BCJ_ARM_THUMB_FILTER,
410        &Self::BCJ_SPARC_FILTER,
411        &Self::BCJ_RISCV_FILTER,
412        &Self::DELTA_FILTER,
413        &Self::BCJ2_FILTER,
414    ];
415
416    #[inline]
417    /// Returns the human-readable name of this encoder method.
418    pub const fn name(&self) -> &'static str {
419        self.0
420    }
421
422    #[inline]
423    /// Returns the binary ID of this encoder method.
424    pub const fn id(&self) -> &'static [u8] {
425        self.1
426    }
427
428    #[inline]
429    /// Finds an encoder method by its binary ID.
430    ///
431    /// # Arguments
432    /// * `id` - The binary method ID to search for
433    pub fn by_id(id: &[u8]) -> Option<Self> {
434        Self::ENCODING_METHODS
435            .iter()
436            .find(|item| item.id() == id)
437            .cloned()
438            .cloned()
439    }
440}
441
442/// Mapping structure that correlates files, blocks, and pack streams within an archive.
443///
444/// This structure maintains the relationships between archive entries and their
445/// corresponding compression blocks and packed data streams.
446#[derive(Debug, Default, Clone)]
447pub struct StreamMap {
448    pub(crate) block_first_pack_stream_index: Vec<usize>,
449    pub(crate) pack_stream_offsets: Vec<u64>,
450    /// Index of first file for each block.
451    pub block_first_file_index: Vec<usize>,
452    /// Block index for each file (None if file has no data).
453    pub file_block_index: Vec<Option<usize>>,
454}
455
456#[derive(Debug, Clone, Copy)]
457pub(crate) struct StartHeader {
458    pub(crate) next_header_offset: u64,
459    pub(crate) next_header_size: u64,
460    pub(crate) next_header_crc: u64,
461}