stream_unpack/zip/
read_cd.rs

1use thiserror::Error;
2
3use super::{structures::{central_directory::{CentralDirectoryError, CentralDirectory}, cd_location::{CDLD_MAX_SIZE, EndOfCentralDirectory32, CentralDirectoryLocationData, EOCD64_LOCATOR_CONSTANT_SIZE, EOCD64_LOCATOR_SIGNATURE, EndOfCentralDirectory64Locator, EOCD64_CONSTANT_SIZE, EOCD64_SIGNATURE, EndOfCentralDirectory64}}, ZipPosition};
4
5#[derive(Debug, Error)]
6pub enum CentralDirectoryReadError {
7    #[error("failed to map required data spans to disks")]
8    Map,
9
10    #[error("provider returned an error: {0}")]
11    FromProvider(#[from] anyhow::Error),
12
13    #[error("provider returned wrong amount of bytes: expected {0}, got {1}")]
14    ProviderByteCount(usize, usize),
15
16    #[error("failed to find end of central directory")]
17    NoEOCD32,
18
19    #[error("invalid disk sizes")]
20    InvalidDiskSizes,
21
22    #[error("failed to read end of central directory at")]
23    BadEOCD32,
24
25    #[error("failed to read end of zip64 central directory locator")]
26    BadEOCD64Locator,
27
28    #[error("failed to read end of zip64 central directory")]
29    BadEOCD64,
30
31    #[error("failed to decode central directory: {0}")]
32    DecodeCentralDirectory(#[from] CentralDirectoryError),
33}
34
35/// Tries to locate and read a central directory by using the provider callback.
36/// Disk sizes must be provided starting from the first disk (usually .001 or .z01). 
37/// The last disk file is sometimes not labeled with a number
38/// 
39/// The "is_cut" option enables processing "multipart archives" which are not actually
40/// multipart archives, and instead just a regular archive file split into pieces, 
41/// without changing any of the structures
42/// 
43/// The arguments to the provider callback are a [ZipPosition] and length. It is guaranteed
44/// that the length will not exceed the remaining size of the disk
45pub fn from_provider(disk_sizes: impl AsRef<[usize]>, is_cut: bool, provider: impl Fn(ZipPosition, usize) -> Result<Vec<u8>, anyhow::Error>) -> Result<CentralDirectory, CentralDirectoryReadError> {
46    let disk_sizes = disk_sizes.as_ref();
47    let total_size = disk_sizes.iter().sum::<usize>();
48
49    let cdld_offset = total_size - std::cmp::min(total_size, CDLD_MAX_SIZE);
50    let cdld_bytes = make_calls(map_global_to_calls(disk_sizes, cdld_offset, CDLD_MAX_SIZE)?, &provider)?;
51
52    let eocd32_offset = EndOfCentralDirectory32::find_offset(&cdld_bytes)
53        .ok_or(CentralDirectoryReadError::NoEOCD32)?;
54
55    let eocd32 = EndOfCentralDirectory32::from_bytes(&cdld_bytes[(eocd32_offset + 4)..])
56        .ok_or(CentralDirectoryReadError::BadEOCD32)?;
57
58    if eocd32_offset + 4 + eocd32.eocd32_size as usize != cdld_bytes.len() {
59        return Err(CentralDirectoryReadError::InvalidDiskSizes);
60    }
61
62    let cdld = if !eocd32.requires_zip64() || eocd32_offset < 4 + EOCD64_LOCATOR_CONSTANT_SIZE {
63        CentralDirectoryLocationData::from_eocd32(eocd32)
64    } else {
65        let locator_offset = eocd32_offset - 4 - EOCD64_LOCATOR_CONSTANT_SIZE;
66        
67        let locator_signature = u32::from_le_bytes(cdld_bytes[locator_offset..(locator_offset + 4)].try_into().unwrap());
68        if locator_signature != EOCD64_LOCATOR_SIGNATURE {
69            return Err(CentralDirectoryReadError::BadEOCD64Locator);
70        }
71
72        let locator = EndOfCentralDirectory64Locator::from_bytes(&cdld_bytes[(locator_offset + 4)..])
73            .ok_or(CentralDirectoryReadError::BadEOCD64Locator)?;
74
75        let eocd64_pos = ZipPosition::new(
76            locator.eocd64_disk_number as usize, 
77            locator.eocd64_offset as usize
78        );
79
80        let eocd64_bytes = make_calls(map_to_calls(disk_sizes, eocd64_pos, 4 + EOCD64_CONSTANT_SIZE, is_cut)?, &provider)?;
81
82        let eocd64_signature = u32::from_le_bytes(eocd64_bytes[..4].try_into().unwrap());
83        if eocd64_signature != EOCD64_SIGNATURE {
84            return Err(CentralDirectoryReadError::BadEOCD64);
85        }
86
87        let eocd64 = EndOfCentralDirectory64::from_bytes(&eocd64_bytes[4..])
88            .ok_or(CentralDirectoryReadError::BadEOCD64)?;
89
90        CentralDirectoryLocationData::from_eocd64(eocd32, eocd64)
91    };
92
93    let cd_pos = ZipPosition::new(
94        cdld.cd_disk_number as usize,
95        cdld.cd_offset as usize
96    );
97    let cd_bytes = make_calls(map_to_calls(disk_sizes, cd_pos, cdld.cd_size as usize, is_cut)?, &provider)?;
98
99    Ok(CentralDirectory::from_bytes(cd_bytes)?)
100}
101
102#[inline]
103fn make_calls(calls: Vec<(ZipPosition, usize)>, provider: &dyn Fn(ZipPosition, usize) -> Result<Vec<u8>, anyhow::Error>) -> Result<Vec<u8>, CentralDirectoryReadError> {
104    let mut bytes = Vec::new();
105
106    for call in calls {
107        let data = provider(call.0, call.1)?;
108
109        if data.len() != call.1 {
110            return Err(CentralDirectoryReadError::ProviderByteCount(call.1, data.len()));
111        }
112
113        bytes.extend(data);
114    }
115
116    Ok(bytes)
117}
118
119#[inline]
120fn map_local_to_calls(disk_sizes: &[usize], pos: ZipPosition, length: usize) -> Result<Vec<(ZipPosition, usize)>, CentralDirectoryReadError> {
121    let mut out = Vec::new();
122
123    let mut cur_offset = pos.offset;
124    let mut left = length;
125    for (i, size) in disk_sizes.iter().enumerate().skip(pos.disk) {
126        let from_this = std::cmp::min(size - cur_offset, left);
127        out.push((ZipPosition::new(i, cur_offset), from_this));
128
129        left -= from_this;
130        if left == 0 {
131            break;
132        }
133
134        cur_offset = 0;
135    }
136
137    if !out.is_empty() {
138        Ok(out)
139    } else {
140        Err(CentralDirectoryReadError::Map)
141    }
142}
143
144#[inline]
145fn map_global_to_calls(disk_sizes: &[usize], offset: usize, length: usize) -> Result<Vec<(ZipPosition, usize)>, CentralDirectoryReadError> {
146    let mut left = offset;
147    for (i, size) in disk_sizes.iter().enumerate() {
148        if left < *size {
149            return map_local_to_calls(disk_sizes, ZipPosition::new(i, left), length);
150        }
151
152        left -= *size;
153    }
154
155    Err(CentralDirectoryReadError::Map)
156}
157
158fn map_to_calls(disk_sizes: &[usize], pos: ZipPosition, length: usize, global: bool) -> Result<Vec<(ZipPosition, usize)>, CentralDirectoryReadError> {
159    if !global {
160        map_local_to_calls(disk_sizes, pos, length)
161    } else {
162        map_global_to_calls(disk_sizes, pos.offset, length)
163    }
164}