1use crate::error::{Result, SZipError};
7use flate2::read::DeflateDecoder;
8use std::fs::File;
9use std::io::{BufReader, Read, Seek, SeekFrom};
10use std::path::Path;
11
12const LOCAL_FILE_HEADER_SIGNATURE: u32 = 0x04034b50;
14
15const CENTRAL_DIRECTORY_SIGNATURE: u32 = 0x02014b50;
17
18const END_OF_CENTRAL_DIRECTORY_SIGNATURE: u32 = 0x06054b50;
20
21const ZIP64_END_OF_CENTRAL_DIRECTORY_SIGNATURE: u32 = 0x06064b50;
23
24#[derive(Debug, Clone)]
28pub struct ZipEntry {
29 pub name: String,
30 pub compressed_size: u64,
31 pub uncompressed_size: u64,
32 pub compression_method: u16,
33 pub offset: u64,
34}
35
36pub struct StreamingZipReader {
38 file: BufReader<File>,
39 entries: Vec<ZipEntry>,
40}
41
42impl StreamingZipReader {
43 pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
45 let mut file = BufReader::new(File::open(path)?);
46
47 let entries = Self::read_central_directory(&mut file)?;
49
50 Ok(StreamingZipReader { file, entries })
51 }
52
53 pub fn entries(&self) -> &[ZipEntry] {
55 &self.entries
56 }
57
58 pub fn find_entry(&self, name: &str) -> Option<&ZipEntry> {
60 self.entries.iter().find(|e| e.name == name)
61 }
62
63 pub fn read_entry(&mut self, entry: &ZipEntry) -> Result<Vec<u8>> {
65 self.file.seek(SeekFrom::Start(entry.offset))?;
67
68 let signature = self.read_u32_le()?;
70 if signature != LOCAL_FILE_HEADER_SIGNATURE {
71 return Err(SZipError::InvalidFormat(
72 "Invalid local file header signature".to_string(),
73 ));
74 }
75
76 self.file.seek(SeekFrom::Current(6))?;
78
79 self.file.seek(SeekFrom::Current(8))?;
81
82 self.file.seek(SeekFrom::Current(8))?;
84
85 let filename_len = self.read_u16_le()? as i64;
87 let extra_len = self.read_u16_le()? as i64;
88
89 self.file
91 .seek(SeekFrom::Current(filename_len + extra_len))?;
92
93 let mut compressed_data = vec![0u8; entry.compressed_size as usize];
95 self.file.read_exact(&mut compressed_data)?;
96
97 let data = if entry.compression_method == 8 {
99 let mut decoder = DeflateDecoder::new(&compressed_data[..]);
101 let mut decompressed = Vec::new();
102 decoder.read_to_end(&mut decompressed)?;
103 decompressed
104 } else if entry.compression_method == 0 {
105 compressed_data
107 } else {
108 return Err(SZipError::UnsupportedCompression(entry.compression_method));
109 };
110
111 Ok(data)
112 }
113
114 pub fn read_entry_by_name(&mut self, name: &str) -> Result<Vec<u8>> {
116 let entry = self
117 .find_entry(name)
118 .ok_or_else(|| SZipError::EntryNotFound(name.to_string()))?
119 .clone();
120
121 self.read_entry(&entry)
122 }
123
124 pub fn read_entry_streaming_by_name(&mut self, name: &str) -> Result<Box<dyn Read + '_>> {
127 let entry = self
128 .find_entry(name)
129 .ok_or_else(|| SZipError::EntryNotFound(name.to_string()))?
130 .clone();
131
132 self.read_entry_streaming(&entry)
133 }
134
135 pub fn read_entry_streaming(&mut self, entry: &ZipEntry) -> Result<Box<dyn Read + '_>> {
138 self.file.seek(SeekFrom::Start(entry.offset))?;
140
141 let signature = self.read_u32_le()?;
143 if signature != LOCAL_FILE_HEADER_SIGNATURE {
144 return Err(SZipError::InvalidFormat(
145 "Invalid local file header signature".to_string(),
146 ));
147 }
148
149 self.file.seek(SeekFrom::Current(6))?;
151
152 self.file.seek(SeekFrom::Current(8))?;
154
155 self.file.seek(SeekFrom::Current(8))?;
157
158 let filename_len = self.read_u16_le()? as i64;
160 let extra_len = self.read_u16_le()? as i64;
161
162 self.file
164 .seek(SeekFrom::Current(filename_len + extra_len))?;
165
166 let limited_reader = (&mut self.file).take(entry.compressed_size);
168
169 if entry.compression_method == 8 {
171 Ok(Box::new(DeflateDecoder::new(limited_reader)))
173 } else if entry.compression_method == 0 {
174 Ok(Box::new(limited_reader))
176 } else {
177 Err(SZipError::UnsupportedCompression(entry.compression_method))
178 }
179 }
180
181 pub fn read_entry_by_name_streaming(&mut self, name: &str) -> Result<Box<dyn Read + '_>> {
183 let entry = self
184 .find_entry(name)
185 .ok_or_else(|| SZipError::EntryNotFound(name.to_string()))?
186 .clone();
187
188 self.read_entry_streaming(&entry)
189 }
190
191 fn read_central_directory(file: &mut BufReader<File>) -> Result<Vec<ZipEntry>> {
193 let eocd_offset = Self::find_eocd(file)?;
195
196 file.seek(SeekFrom::Start(eocd_offset))?;
198
199 let signature = Self::read_u32_le_static(file)?;
201 if signature != END_OF_CENTRAL_DIRECTORY_SIGNATURE {
202 return Err(SZipError::InvalidFormat(format!(
203 "Invalid end of central directory signature: 0x{:08x}",
204 signature
205 )));
206 }
207
208 file.seek(SeekFrom::Current(4))?;
210
211 let _entries_on_disk = Self::read_u16_le_static(file)?;
213
214 let total_entries_16 = Self::read_u16_le_static(file)?;
218
219 let cd_size_32 = Self::read_u32_le_static(file)?;
221
222 let cd_offset_32 = Self::read_u32_le_static(file)? as u64;
224
225 let mut total_entries = total_entries_16 as usize;
227 let mut cd_offset = cd_offset_32;
228 let _cd_size = cd_size_32 as u64;
229
230 if total_entries_16 == 0xFFFF || cd_size_32 == 0xFFFFFFFF || cd_offset_32 == 0xFFFFFFFF {
231 let (zip64_total_entries, zip64_cd_size, zip64_cd_offset) =
233 Self::read_zip64_eocd(file, eocd_offset)?;
234 total_entries = zip64_total_entries as usize;
235 cd_offset = zip64_cd_offset;
236 let _ = zip64_cd_size;
238 }
239
240 file.seek(SeekFrom::Start(cd_offset))?;
242
243 let mut entries = Vec::with_capacity(total_entries);
245 for _ in 0..total_entries {
246 let signature = Self::read_u32_le_static(file)?;
247 if signature != CENTRAL_DIRECTORY_SIGNATURE {
248 break;
249 }
250
251 file.seek(SeekFrom::Current(6))?;
253
254 let compression_method = Self::read_u16_le_static(file)?;
255
256 file.seek(SeekFrom::Current(8))?;
258
259 let compressed_size_32 = Self::read_u32_le_static(file)? as u64;
261 let uncompressed_size_32 = Self::read_u32_le_static(file)? as u64;
262 let filename_len = Self::read_u16_le_static(file)? as usize;
263 let extra_len = Self::read_u16_le_static(file)? as usize;
264 let comment_len = Self::read_u16_le_static(file)? as usize;
265
266 file.seek(SeekFrom::Current(8))?;
268
269 let mut offset = Self::read_u32_le_static(file)? as u64;
270
271 let mut filename_buf = vec![0u8; filename_len];
273 file.read_exact(&mut filename_buf)?;
274 let name = String::from_utf8_lossy(&filename_buf).to_string();
275
276 let mut extra_buf = vec![0u8; extra_len];
278 if extra_len > 0 {
279 file.read_exact(&mut extra_buf)?;
280 }
281
282 let mut compressed_size = compressed_size_32;
284 let mut uncompressed_size = uncompressed_size_32;
285
286 if compressed_size_32 == 0xFFFFFFFF
287 || uncompressed_size_32 == 0xFFFFFFFF
288 || offset == 0xFFFFFFFF
289 {
290 let mut i = 0usize;
292 while i + 4 <= extra_buf.len() {
293 let id = u16::from_le_bytes([extra_buf[i], extra_buf[i + 1]]);
294 let data_len =
295 u16::from_le_bytes([extra_buf[i + 2], extra_buf[i + 3]]) as usize;
296 i += 4;
297 if i + data_len > extra_buf.len() {
298 break;
299 }
300 if id == 0x0001 {
301 let mut cursor = 0usize;
303 if uncompressed_size_32 == 0xFFFFFFFF && cursor + 8 <= data_len {
305 uncompressed_size = u64::from_le_bytes([
306 extra_buf[i + cursor],
307 extra_buf[i + cursor + 1],
308 extra_buf[i + cursor + 2],
309 extra_buf[i + cursor + 3],
310 extra_buf[i + cursor + 4],
311 extra_buf[i + cursor + 5],
312 extra_buf[i + cursor + 6],
313 extra_buf[i + cursor + 7],
314 ]);
315 cursor += 8;
316 }
317 if compressed_size_32 == 0xFFFFFFFF && cursor + 8 <= data_len {
319 compressed_size = u64::from_le_bytes([
320 extra_buf[i + cursor],
321 extra_buf[i + cursor + 1],
322 extra_buf[i + cursor + 2],
323 extra_buf[i + cursor + 3],
324 extra_buf[i + cursor + 4],
325 extra_buf[i + cursor + 5],
326 extra_buf[i + cursor + 6],
327 extra_buf[i + cursor + 7],
328 ]);
329 cursor += 8;
330 }
331 if offset == 0xFFFFFFFF && cursor + 8 <= data_len {
333 offset = u64::from_le_bytes([
334 extra_buf[i + cursor],
335 extra_buf[i + cursor + 1],
336 extra_buf[i + cursor + 2],
337 extra_buf[i + cursor + 3],
338 extra_buf[i + cursor + 4],
339 extra_buf[i + cursor + 5],
340 extra_buf[i + cursor + 6],
341 extra_buf[i + cursor + 7],
342 ]);
343 }
344 break;
346 }
347 i += data_len;
348 }
349 }
350
351 if comment_len > 0 {
353 file.seek(SeekFrom::Current(comment_len as i64))?;
354 }
355
356 entries.push(ZipEntry {
357 name,
358 compressed_size,
359 uncompressed_size,
360 compression_method,
361 offset,
362 });
363 }
364
365 Ok(entries)
366 }
367
368 fn read_zip64_eocd(file: &mut BufReader<File>, eocd_offset: u64) -> Result<(u64, u64, u64)> {
370 let search_start = eocd_offset.saturating_sub(65557);
372 file.seek(SeekFrom::Start(search_start))?;
373 let mut buffer = Vec::new();
374 file.read_to_end(&mut buffer)?;
375
376 let mut locator_pos: Option<usize> = None;
377 for i in (0..buffer.len().saturating_sub(3)).rev() {
378 if buffer[i] == 0x50
379 && buffer[i + 1] == 0x4b
380 && buffer[i + 2] == 0x06
381 && buffer[i + 3] == 0x07
382 {
383 locator_pos = Some(i);
384 break;
385 }
386 }
387
388 let locator_pos = locator_pos
389 .ok_or_else(|| SZipError::InvalidFormat("ZIP64 EOCD locator not found".to_string()))?;
390
391 let rel_off_bytes = &buffer[locator_pos + 8..locator_pos + 16];
394 let zip64_eocd_offset = u64::from_le_bytes([
395 rel_off_bytes[0],
396 rel_off_bytes[1],
397 rel_off_bytes[2],
398 rel_off_bytes[3],
399 rel_off_bytes[4],
400 rel_off_bytes[5],
401 rel_off_bytes[6],
402 rel_off_bytes[7],
403 ]);
404
405 file.seek(SeekFrom::Start(zip64_eocd_offset))?;
407
408 let sig = Self::read_u32_le_static(file)?;
409 if sig != ZIP64_END_OF_CENTRAL_DIRECTORY_SIGNATURE {
410 return Err(SZipError::InvalidFormat(format!(
411 "Invalid ZIP64 EOCD signature: 0x{:08x}",
412 sig
413 )));
414 }
415
416 let _size = {
418 let mut buf = [0u8; 8];
419 file.read_exact(&mut buf)?;
420 u64::from_le_bytes(buf)
421 };
422
423 file.seek(SeekFrom::Current(12))?;
425
426 let total_entries = {
428 let mut buf = [0u8; 8];
429 file.read_exact(&mut buf)?;
430 u64::from_le_bytes(buf)
431 };
432
433 {
435 let mut buf = [0u8; 8];
436 file.read_exact(&mut buf)?;
437 }
439
440 let cd_size = {
442 let mut buf = [0u8; 8];
443 file.read_exact(&mut buf)?;
444 u64::from_le_bytes(buf)
445 };
446
447 let cd_offset = {
449 let mut buf = [0u8; 8];
450 file.read_exact(&mut buf)?;
451 u64::from_le_bytes(buf)
452 };
453
454 Ok((total_entries, cd_size, cd_offset))
455 }
456
457 fn find_eocd(file: &mut BufReader<File>) -> Result<u64> {
459 let file_size = file.seek(SeekFrom::End(0))?;
460
461 let search_start = file_size.saturating_sub(65557);
463 file.seek(SeekFrom::Start(search_start))?;
464
465 let mut buffer = Vec::new();
466 file.read_to_end(&mut buffer)?;
467
468 for i in (0..buffer.len().saturating_sub(3)).rev() {
470 if buffer[i] == 0x50
471 && buffer[i + 1] == 0x4b
472 && buffer[i + 2] == 0x05
473 && buffer[i + 3] == 0x06
474 {
475 return Ok(search_start + i as u64);
476 }
477 }
478
479 Err(SZipError::InvalidFormat(
480 "End of central directory not found".to_string(),
481 ))
482 }
483
484 fn read_u16_le(&mut self) -> Result<u16> {
485 let mut buf = [0u8; 2];
486 self.file.read_exact(&mut buf)?;
487 Ok(u16::from_le_bytes(buf))
488 }
489
490 fn read_u32_le(&mut self) -> Result<u32> {
491 let mut buf = [0u8; 4];
492 self.file.read_exact(&mut buf)?;
493 Ok(u32::from_le_bytes(buf))
494 }
495
496 fn read_u16_le_static(file: &mut BufReader<File>) -> Result<u16> {
497 let mut buf = [0u8; 2];
498 file.read_exact(&mut buf)?;
499 Ok(u16::from_le_bytes(buf))
500 }
501
502 fn read_u32_le_static(file: &mut BufReader<File>) -> Result<u32> {
503 let mut buf = [0u8; 4];
504 file.read_exact(&mut buf)?;
505 Ok(u32::from_le_bytes(buf))
506 }
507}