1use thiserror::Error;
2
3use crate::decompress::{Decompressor, DecompressionError};
4
5use self::structures::{local_file_header::{LocalFileHeader, LFH_SIGNATURE, LFH_CONSTANT_SIZE}, DecompressorCreationError, central_directory::{CentralDirectoryFileHeader, SortedCentralDirectory}};
6
7pub mod structures;
9
10pub mod read_cd;
12
13#[derive(Debug, Error)]
14pub enum DecoderError {
15 #[error("failed to decompress: {0}")]
16 Decompression(#[from] DecompressionError),
17
18 #[error("could not create decompressor: {0}")]
19 DecompressorInit(#[from] DecompressorCreationError),
20
21 #[error("data exceeded archive size")]
22 ExtraData,
23
24 #[error("next header is at {0} but current position is {1}, one of the disk sizes is probably invalid")]
25 Overshoot(ZipPosition, ZipPosition),
26
27 #[error("could not find a file with position {0} in the central directory")]
28 InvalidOffset(ZipPosition),
29
30 #[error("file header has an invalid signature")]
31 InvalidSignature,
32
33 #[error("error within callback: {0}")]
34 FromDecodeCallback(#[from] anyhow::Error)
35}
36
37#[derive(Debug)]
38enum ZipDecoderState {
39 FileHeader,
40 FileData(u64, LocalFileHeader, Option<Box<dyn Decompressor>>)
41}
42
43#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Default)]
45pub struct ZipPosition {
46 pub disk: usize,
47 pub offset: usize
48}
49
50impl std::fmt::Display for ZipPosition {
51 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
52 write!(f, "{}:{}", self.disk, self.offset)
53 }
54}
55
56impl ZipPosition {
57 pub fn new(disk: usize, offset: usize) -> Self {
59 Self {
60 offset,
61 disk
62 }
63 }
64
65 pub fn from_offset(offset: usize) -> Self {
67 Self::new(0, offset)
68 }
69}
70
71#[derive(Debug)]
73pub enum ZipDecodedData<'a> {
74 FileHeader(&'a CentralDirectoryFileHeader, &'a LocalFileHeader),
76
77 FileData(&'a [u8])
79}
80
81pub struct ZipUnpacker<'a> {
83 decoder_state: ZipDecoderState,
84 current_index: usize,
85 current_position: ZipPosition,
86
87 disk_sizes: Vec<usize>,
88 central_directory: SortedCentralDirectory,
89
90 #[allow(clippy::type_complexity)]
91 on_decode: Option<Box<dyn Fn(ZipDecodedData) -> anyhow::Result<()> + 'a>>
92}
93
94impl std::fmt::Debug for ZipUnpacker<'_> {
95 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
96 f.debug_struct("ZipUnpacker")
97 .field("decoder_state", &self.decoder_state)
98 .field("current_index", &self.current_index)
99 .field("current_position", &self.current_position)
100 .field("disk_sizes", &self.disk_sizes)
101 .finish()
102 }
103}
104
105impl<'a> ZipUnpacker<'a> {
106 pub fn new(central_directory: SortedCentralDirectory, disk_sizes: Vec<usize>) -> Self {
112 Self {
113 decoder_state: ZipDecoderState::FileHeader,
114 current_index: 0,
115 current_position: ZipPosition::default(),
116
117 disk_sizes,
118 central_directory,
119
120 on_decode: None
121 }
122 }
123
124 pub fn resume(central_directory: SortedCentralDirectory, disk_sizes: Vec<usize>, position: ZipPosition) -> Result<Self, DecoderError> {
132 let index = central_directory.headers_ref()
133 .binary_search_by(|h| h.header_position().cmp(&position))
134 .map_err(|_| DecoderError::InvalidOffset(position))?;
135
136 Ok(Self {
137 decoder_state: ZipDecoderState::FileHeader,
138 current_index: index,
139 current_position: position,
140
141 disk_sizes,
142 central_directory,
143
144 on_decode: None
145 })
146 }
147
148 pub fn set_callback(&mut self, on_decode: impl Fn(ZipDecodedData) -> anyhow::Result<()> + 'a) {
151 self.on_decode = Some(Box::new(on_decode));
152 }
153
154 pub fn update(&mut self, data: impl AsRef<[u8]>) -> Result<(usize, bool), DecoderError> {
163 let data = data.as_ref();
164
165 let mut buf_offset = 0;
166 loop {
167 let (advanced, reached_end) = self.update_internal(&data[buf_offset..])?;
168 buf_offset += advanced;
169
170 self.current_position.offset += advanced;
171 if self.current_position.offset >= self.disk_sizes[self.current_position.disk] {
172 let mut new_offset = self.current_position.offset;
174 let mut new_disk_number = None;
175 for d in (self.current_position.disk)..(self.disk_sizes.len() - 1) {
176 new_offset -= self.disk_sizes[d];
177 if new_offset < self.disk_sizes[d + 1] {
178 new_disk_number = Some(d + 1);
179 break;
180 }
181 }
182
183 let Some(new_disk_number) = new_disk_number else {
184 return Err(DecoderError::ExtraData);
185 };
186
187 self.current_position.offset = new_offset;
188 self.current_position.disk = new_disk_number;
189 }
190
191 if advanced == 0 || reached_end {
192 return Ok((buf_offset, reached_end));
193 }
194 }
195 }
196
197 fn update_internal(&mut self, data: impl AsRef<[u8]>) -> Result<(usize, bool), DecoderError> {
198 let headers = self.central_directory.headers_ref();
199 if self.current_index >= headers.len() {
200 return Ok((0, true));
201 }
202 let cdfh = &headers[self.current_index];
203
204 let data = data.as_ref();
205
206 match &mut self.decoder_state {
207 ZipDecoderState::FileHeader => {
208 if self.current_position > cdfh.header_position() {
209 return Err(DecoderError::Overshoot(cdfh.header_position(), self.current_position));
210 }
211
212 if self.current_position.disk < cdfh.disk_number as usize {
213 return Ok((std::cmp::min(self.disk_sizes[self.current_position.disk] - self.current_position.offset, data.len()), false));
215 }
216
217 if self.current_position.offset < cdfh.local_header_offset as usize {
218 return Ok((std::cmp::min(cdfh.local_header_offset as usize - self.current_position.offset, data.len()), false));
219 }
220
221 if data.len() < 4 + LFH_CONSTANT_SIZE {
222 return Ok((0, false));
223 }
224
225 let signature = u32::from_le_bytes(data[..4].try_into().unwrap());
226 if signature != LFH_SIGNATURE {
227 return Err(DecoderError::InvalidSignature);
228 }
229
230 let Some(lfh) = LocalFileHeader::from_bytes(&data[4..]) else {
231 return Ok((0, false));
232 };
233 let header_size = lfh.header_size;
234
235 if let Some(on_decode) = &self.on_decode {
236 (on_decode)(ZipDecodedData::FileHeader(cdfh, &lfh))?;
237 }
238
239 if lfh.uncompressed_size != 0 {
240 let decompressor = lfh.compression_method
241 .as_ref()
242 .map(|m| m.create_decompressor())
243 .transpose()?;
244
245 self.decoder_state = ZipDecoderState::FileData(0, lfh, decompressor);
246 } else {
247 self.decoder_state = ZipDecoderState::FileHeader;
248 self.current_index += 1;
249 }
250
251 Ok((4 + header_size, false))
252 },
253
254 ZipDecoderState::FileData(pos, lfh, decompressor) => {
255 let bytes_left = lfh.compressed_size - *pos;
256 let bytes_to_read = std::cmp::min(bytes_left as usize, data.len());
257 let file_bytes = &data[..bytes_to_read];
258
259 let (count, decompressed) = if let Some(decompressor) = decompressor {
260 decompressor.update(file_bytes)?
261 } else {
262 (bytes_to_read, file_bytes)
263 };
264 *pos += count as u64;
265
266 if let Some(on_decode) = &self.on_decode {
267 (on_decode)(ZipDecodedData::FileData(decompressed))?;
268 }
269
270 if count as u64 == bytes_left {
271 self.decoder_state = ZipDecoderState::FileHeader;
272 self.current_index += 1;
273 }
274
275 Ok((count, false))
276 }
277 }
278 }
279}