1use thiserror::Error;
2
3use crate::{decrypt::DecryptorCreationError, pipeline::{Pipeline, PipelineError}};
4
5use self::structures::{local_file_header::{LocalFileHeader, LFH_SIGNATURE, LFH_CONSTANT_SIZE}, DecompressorCreationError, central_directory::{CentralDirectoryFileHeader, SortedCentralDirectory}};
6
7pub mod structures;
9
10pub mod read_cd;
12
13#[derive(Debug, Error)]
14pub enum DecoderError {
15 #[error("file pipeline failed: {0}")]
16 Pipeline(#[from] PipelineError),
17
18 #[error("could not create decompressor: {0}")]
19 DecompressorInit(#[from] DecompressorCreationError),
20
21 #[error("could not create decryptor: {0}")]
22 DecryptorInit(#[from] DecryptorCreationError),
23
24 #[error("no password provided for encrypted file")]
25 NoPassword,
26
27 #[error("data exceeded archive size")]
28 ExtraData,
29
30 #[error("next header is at {0} but current position is {1}, one of the disk sizes is probably invalid")]
31 Overshoot(ZipPosition, ZipPosition),
32
33 #[error("could not find a file with position {0} in the central directory")]
34 InvalidOffset(ZipPosition),
35
36 #[error("file header has an invalid signature")]
37 InvalidSignature,
38
39 #[error("error within callback: {0}")]
40 FromDecodeCallback(#[from] anyhow::Error)
41}
42
43#[derive(Debug)]
44enum ZipDecoderState {
45 FileHeader,
46 FileData(u64, LocalFileHeader, Pipeline)
47}
48
49#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Default)]
51pub struct ZipPosition {
52 pub disk: usize,
53 pub offset: usize
54}
55
56impl std::fmt::Display for ZipPosition {
57 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
58 write!(f, "{}:{}", self.disk, self.offset)
59 }
60}
61
62impl ZipPosition {
63 pub fn new(disk: usize, offset: usize) -> Self {
65 Self {
66 offset,
67 disk
68 }
69 }
70
71 pub fn from_offset(offset: usize) -> Self {
73 Self::new(0, offset)
74 }
75}
76
77#[derive(Debug)]
79pub enum ZipDecodedData<'a> {
80 FileHeader(&'a CentralDirectoryFileHeader, &'a LocalFileHeader),
82
83 FileData(&'a [u8])
85}
86
87pub struct ZipUnpacker<'a> {
89 decoder_state: ZipDecoderState,
90 current_index: usize,
91 current_position: ZipPosition,
92
93 disk_sizes: Vec<usize>,
94 central_directory: SortedCentralDirectory,
95
96 password: Option<Vec<u8>>,
97
98 #[allow(clippy::type_complexity)]
99 on_decode: Option<Box<dyn Fn(ZipDecodedData) -> anyhow::Result<()> + 'a>>
100}
101
102impl std::fmt::Debug for ZipUnpacker<'_> {
103 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
104 f.debug_struct("ZipUnpacker")
105 .field("decoder_state", &self.decoder_state)
106 .field("current_index", &self.current_index)
107 .field("current_position", &self.current_position)
108 .field("disk_sizes", &self.disk_sizes)
109 .finish()
110 }
111}
112
113impl<'a> ZipUnpacker<'a> {
114 pub fn new(central_directory: SortedCentralDirectory, disk_sizes: Vec<usize>) -> Self {
120 Self {
121 decoder_state: ZipDecoderState::FileHeader,
122 current_index: 0,
123 current_position: ZipPosition::default(),
124
125 disk_sizes,
126 central_directory,
127
128 password: None,
129
130 on_decode: None
131 }
132 }
133
134 pub fn new_with_password(central_directory: SortedCentralDirectory, disk_sizes: Vec<usize>, password: Vec<u8>) -> Self {
137 Self {
138 password: Some(password),
139 ..Self::new(central_directory, disk_sizes)
140 }
141 }
142
143 pub fn resume(central_directory: SortedCentralDirectory, disk_sizes: Vec<usize>, position: ZipPosition) -> Result<Self, DecoderError> {
151 let index = central_directory.headers_ref()
152 .binary_search_by(|h| h.header_position().cmp(&position))
153 .map_err(|_| DecoderError::InvalidOffset(position))?;
154
155 Ok(Self {
156 decoder_state: ZipDecoderState::FileHeader,
157 current_index: index,
158 current_position: position,
159
160 disk_sizes,
161 central_directory,
162
163 password: None,
164
165 on_decode: None
166 })
167 }
168
169 pub fn resume_with_password(central_directory: SortedCentralDirectory, disk_sizes: Vec<usize>, password: Vec<u8>, position: ZipPosition) -> Result<Self, DecoderError> {
172 Ok(Self {
173 password: Some(password),
174 ..(Self::resume(central_directory, disk_sizes, position)?)
175 })
176 }
177
178 pub fn set_callback(&mut self, on_decode: impl Fn(ZipDecodedData) -> anyhow::Result<()> + 'a) {
181 self.on_decode = Some(Box::new(on_decode));
182 }
183
184 pub fn update(&mut self, data: impl AsRef<[u8]>) -> Result<(usize, bool), DecoderError> {
193 let data = data.as_ref();
194
195 let mut buf_offset = 0;
196 loop {
197 let (advanced, reached_end) = self.update_internal(&data[buf_offset..])?;
198 buf_offset += advanced;
199
200 self.current_position.offset += advanced;
201 if self.current_position.offset >= self.disk_sizes[self.current_position.disk] {
202 let mut new_offset = self.current_position.offset;
204 let mut new_disk_number = None;
205 for d in (self.current_position.disk)..(self.disk_sizes.len() - 1) {
206 new_offset -= self.disk_sizes[d];
207 if new_offset < self.disk_sizes[d + 1] {
208 new_disk_number = Some(d + 1);
209 break;
210 }
211 }
212
213 let Some(new_disk_number) = new_disk_number else {
214 return Err(DecoderError::ExtraData);
215 };
216
217 self.current_position.offset = new_offset;
218 self.current_position.disk = new_disk_number;
219 }
220
221 if advanced == 0 || reached_end {
222 return Ok((buf_offset, reached_end));
223 }
224 }
225 }
226
227 fn update_internal(&mut self, data: impl AsRef<[u8]>) -> Result<(usize, bool), DecoderError> {
228 let headers = self.central_directory.headers_ref();
229 if self.current_index >= headers.len() {
230 return Ok((0, true));
231 }
232 let cdfh = &headers[self.current_index];
233
234 let data = data.as_ref();
235
236 match &mut self.decoder_state {
237 ZipDecoderState::FileHeader => {
238 if self.current_position > cdfh.header_position() {
239 return Err(DecoderError::Overshoot(cdfh.header_position(), self.current_position));
240 }
241
242 if self.current_position.disk < cdfh.disk_number as usize {
243 return Ok((std::cmp::min(self.disk_sizes[self.current_position.disk] - self.current_position.offset, data.len()), false));
245 }
246
247 if self.current_position.offset < cdfh.local_header_offset as usize {
248 return Ok((std::cmp::min(cdfh.local_header_offset as usize - self.current_position.offset, data.len()), false));
249 }
250
251 if data.len() < 4 + LFH_CONSTANT_SIZE {
252 return Ok((0, false));
253 }
254
255 let signature = u32::from_le_bytes(data[..4].try_into().unwrap());
256 if signature != LFH_SIGNATURE {
257 return Err(DecoderError::InvalidSignature);
258 }
259
260 let Some(lfh) = LocalFileHeader::from_bytes(&data[4..]) else {
261 return Ok((0, false));
262 };
263 let header_size = lfh.header_size;
264
265 if let Some(on_decode) = &self.on_decode {
266 (on_decode)(ZipDecodedData::FileHeader(cdfh, &lfh))?;
267 }
268
269 let decryptor = if lfh.is_encrypted() {
270 if let Some(password) = &self.password {
271 Some(lfh.create_decryptor(password)?)
272 } else {
273 return Err(DecoderError::NoPassword);
274 }
275 } else { None };
276
277 if lfh.uncompressed_size != 0 {
278 let decompressor = lfh.compression_method
279 .as_ref()
280 .map(|m| m.create_decompressor())
281 .transpose()?;
282
283 let pipeline = Pipeline::new(decryptor, decompressor);
284 self.decoder_state = ZipDecoderState::FileData(0, lfh, pipeline);
285 } else {
286 self.decoder_state = ZipDecoderState::FileHeader;
287 self.current_index += 1;
288 }
289
290 Ok((4 + header_size, false))
291 },
292
293 ZipDecoderState::FileData(pos, lfh, pipeline) => {
294 let bytes_left = lfh.compressed_size - *pos;
295 let bytes_to_read = std::cmp::min(bytes_left as usize, data.len());
296 let file_bytes = &data[..bytes_to_read];
297
298 let (count, data) = pipeline.update(file_bytes)?;
299 *pos += count as u64;
300
301 if let Some(on_decode) = &self.on_decode {
302 (on_decode)(ZipDecodedData::FileData(data))?;
303 }
304
305 if count as u64 == bytes_left {
306 self.decoder_state = ZipDecoderState::FileHeader;
307 self.current_index += 1;
308 }
309
310 Ok((count, false))
311 }
312 }
313 }
314}