1use crate::error::{Result, SZipError};
7use flate2::read::DeflateDecoder;
8use std::fs::File;
9use std::io::{BufReader, Read, Seek, SeekFrom};
10use std::path::Path;
11
12const LOCAL_FILE_HEADER_SIGNATURE: u32 = 0x04034b50;
14
15const CENTRAL_DIRECTORY_SIGNATURE: u32 = 0x02014b50;
17
18const END_OF_CENTRAL_DIRECTORY_SIGNATURE: u32 = 0x06054b50;
20
21const ZIP64_END_OF_CENTRAL_DIRECTORY_SIGNATURE: u32 = 0x06064b50;
23
24#[derive(Debug, Clone)]
28pub struct ZipEntry {
29 pub name: String,
30 pub compressed_size: u64,
31 pub uncompressed_size: u64,
32 pub compression_method: u16,
33 pub offset: u64,
34}
35
36pub struct StreamingZipReader {
38 file: BufReader<File>,
39 entries: Vec<ZipEntry>,
40}
41
42impl StreamingZipReader {
43 pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
45 Self::open_with_buffer_size(path, None)
46 }
47
48 pub fn open_with_buffer_size<P: AsRef<Path>>(
68 path: P,
69 buffer_size: Option<usize>,
70 ) -> Result<Self> {
71 let file = File::open(path)?;
72
73 let buf_size = buffer_size.unwrap_or(512 * 1024); let mut file = BufReader::with_capacity(buf_size, file);
76
77 let entries = Self::read_central_directory(&mut file)?;
79
80 Ok(StreamingZipReader { file, entries })
81 }
82
83 pub fn entries(&self) -> &[ZipEntry] {
85 &self.entries
86 }
87
88 pub fn find_entry(&self, name: &str) -> Option<&ZipEntry> {
90 self.entries.iter().find(|e| e.name == name)
91 }
92
93 pub fn read_entry(&mut self, entry: &ZipEntry) -> Result<Vec<u8>> {
95 self.file.seek(SeekFrom::Start(entry.offset))?;
97
98 let signature = self.read_u32_le()?;
100 if signature != LOCAL_FILE_HEADER_SIGNATURE {
101 return Err(SZipError::InvalidFormat(
102 "Invalid local file header signature".to_string(),
103 ));
104 }
105
106 self.file.seek(SeekFrom::Current(6))?;
108
109 self.file.seek(SeekFrom::Current(8))?;
111
112 self.file.seek(SeekFrom::Current(8))?;
114
115 let filename_len = self.read_u16_le()? as i64;
117 let extra_len = self.read_u16_le()? as i64;
118
119 self.file
121 .seek(SeekFrom::Current(filename_len + extra_len))?;
122
123 let mut compressed_data = vec![0u8; entry.compressed_size as usize];
125 self.file.read_exact(&mut compressed_data)?;
126
127 let data = if entry.compression_method == 8 {
129 let mut decoder = DeflateDecoder::new(&compressed_data[..]);
131 let mut decompressed = Vec::new();
132 decoder.read_to_end(&mut decompressed)?;
133 decompressed
134 } else if entry.compression_method == 0 {
135 compressed_data
137 } else if entry.compression_method == 93 {
138 #[cfg(feature = "zstd-support")]
140 {
141 zstd::decode_all(&compressed_data[..])?
142 }
143 #[cfg(not(feature = "zstd-support"))]
144 {
145 return Err(SZipError::UnsupportedCompression(entry.compression_method));
146 }
147 } else {
148 return Err(SZipError::UnsupportedCompression(entry.compression_method));
149 };
150
151 Ok(data)
152 }
153
154 pub fn read_entry_by_name(&mut self, name: &str) -> Result<Vec<u8>> {
156 let entry = self
157 .find_entry(name)
158 .ok_or_else(|| SZipError::EntryNotFound(name.to_string()))?
159 .clone();
160
161 self.read_entry(&entry)
162 }
163
164 pub fn read_entry_streaming_by_name(&mut self, name: &str) -> Result<Box<dyn Read + '_>> {
167 let entry = self
168 .find_entry(name)
169 .ok_or_else(|| SZipError::EntryNotFound(name.to_string()))?
170 .clone();
171
172 self.read_entry_streaming(&entry)
173 }
174
175 pub fn read_entry_streaming(&mut self, entry: &ZipEntry) -> Result<Box<dyn Read + '_>> {
178 self.file.seek(SeekFrom::Start(entry.offset))?;
180
181 let signature = self.read_u32_le()?;
183 if signature != LOCAL_FILE_HEADER_SIGNATURE {
184 return Err(SZipError::InvalidFormat(
185 "Invalid local file header signature".to_string(),
186 ));
187 }
188
189 self.file.seek(SeekFrom::Current(6))?;
191
192 self.file.seek(SeekFrom::Current(8))?;
194
195 self.file.seek(SeekFrom::Current(8))?;
197
198 let filename_len = self.read_u16_le()? as i64;
200 let extra_len = self.read_u16_le()? as i64;
201
202 self.file
204 .seek(SeekFrom::Current(filename_len + extra_len))?;
205
206 let limited_reader = (&mut self.file).take(entry.compressed_size);
208
209 if entry.compression_method == 8 {
211 Ok(Box::new(DeflateDecoder::new(limited_reader)))
213 } else if entry.compression_method == 0 {
214 Ok(Box::new(limited_reader))
216 } else if entry.compression_method == 93 {
217 #[cfg(feature = "zstd-support")]
219 {
220 Ok(Box::new(zstd::Decoder::new(limited_reader)?))
221 }
222 #[cfg(not(feature = "zstd-support"))]
223 {
224 Err(SZipError::UnsupportedCompression(entry.compression_method))
225 }
226 } else {
227 Err(SZipError::UnsupportedCompression(entry.compression_method))
228 }
229 }
230
231 pub fn read_entry_by_name_streaming(&mut self, name: &str) -> Result<Box<dyn Read + '_>> {
233 let entry = self
234 .find_entry(name)
235 .ok_or_else(|| SZipError::EntryNotFound(name.to_string()))?
236 .clone();
237
238 self.read_entry_streaming(&entry)
239 }
240
241 fn read_central_directory(file: &mut BufReader<File>) -> Result<Vec<ZipEntry>> {
243 let eocd_offset = Self::find_eocd(file)?;
245
246 file.seek(SeekFrom::Start(eocd_offset))?;
248
249 let signature = Self::read_u32_le_static(file)?;
251 if signature != END_OF_CENTRAL_DIRECTORY_SIGNATURE {
252 return Err(SZipError::InvalidFormat(format!(
253 "Invalid end of central directory signature: 0x{:08x}",
254 signature
255 )));
256 }
257
258 file.seek(SeekFrom::Current(4))?;
260
261 let _entries_on_disk = Self::read_u16_le_static(file)?;
263
264 let total_entries_16 = Self::read_u16_le_static(file)?;
268
269 let cd_size_32 = Self::read_u32_le_static(file)?;
271
272 let cd_offset_32 = Self::read_u32_le_static(file)? as u64;
274
275 let mut total_entries = total_entries_16 as usize;
277 let mut cd_offset = cd_offset_32;
278 let _cd_size = cd_size_32 as u64;
279
280 if total_entries_16 == 0xFFFF || cd_size_32 == 0xFFFFFFFF || cd_offset_32 == 0xFFFFFFFF {
281 let (zip64_total_entries, zip64_cd_size, zip64_cd_offset) =
283 Self::read_zip64_eocd(file, eocd_offset)?;
284 total_entries = zip64_total_entries as usize;
285 cd_offset = zip64_cd_offset;
286 let _ = zip64_cd_size;
288 }
289
290 file.seek(SeekFrom::Start(cd_offset))?;
292
293 let mut entries = Vec::with_capacity(total_entries);
295 for _ in 0..total_entries {
296 let signature = Self::read_u32_le_static(file)?;
297 if signature != CENTRAL_DIRECTORY_SIGNATURE {
298 break;
299 }
300
301 file.seek(SeekFrom::Current(6))?;
303
304 let compression_method = Self::read_u16_le_static(file)?;
305
306 file.seek(SeekFrom::Current(8))?;
308
309 let compressed_size_32 = Self::read_u32_le_static(file)? as u64;
311 let uncompressed_size_32 = Self::read_u32_le_static(file)? as u64;
312 let filename_len = Self::read_u16_le_static(file)? as usize;
313 let extra_len = Self::read_u16_le_static(file)? as usize;
314 let comment_len = Self::read_u16_le_static(file)? as usize;
315
316 file.seek(SeekFrom::Current(8))?;
318
319 let mut offset = Self::read_u32_le_static(file)? as u64;
320
321 let mut filename_buf = vec![0u8; filename_len];
323 file.read_exact(&mut filename_buf)?;
324 let name = String::from_utf8_lossy(&filename_buf).to_string();
325
326 let mut extra_buf = vec![0u8; extra_len];
328 if extra_len > 0 {
329 file.read_exact(&mut extra_buf)?;
330 }
331
332 let mut compressed_size = compressed_size_32;
334 let mut uncompressed_size = uncompressed_size_32;
335
336 if compressed_size_32 == 0xFFFFFFFF
337 || uncompressed_size_32 == 0xFFFFFFFF
338 || offset == 0xFFFFFFFF
339 {
340 let mut i = 0usize;
342 while i + 4 <= extra_buf.len() {
343 let id = u16::from_le_bytes([extra_buf[i], extra_buf[i + 1]]);
344 let data_len =
345 u16::from_le_bytes([extra_buf[i + 2], extra_buf[i + 3]]) as usize;
346 i += 4;
347 if i + data_len > extra_buf.len() {
348 break;
349 }
350 if id == 0x0001 {
351 let mut cursor = 0usize;
353 if uncompressed_size_32 == 0xFFFFFFFF && cursor + 8 <= data_len {
355 uncompressed_size = u64::from_le_bytes([
356 extra_buf[i + cursor],
357 extra_buf[i + cursor + 1],
358 extra_buf[i + cursor + 2],
359 extra_buf[i + cursor + 3],
360 extra_buf[i + cursor + 4],
361 extra_buf[i + cursor + 5],
362 extra_buf[i + cursor + 6],
363 extra_buf[i + cursor + 7],
364 ]);
365 cursor += 8;
366 }
367 if compressed_size_32 == 0xFFFFFFFF && cursor + 8 <= data_len {
369 compressed_size = u64::from_le_bytes([
370 extra_buf[i + cursor],
371 extra_buf[i + cursor + 1],
372 extra_buf[i + cursor + 2],
373 extra_buf[i + cursor + 3],
374 extra_buf[i + cursor + 4],
375 extra_buf[i + cursor + 5],
376 extra_buf[i + cursor + 6],
377 extra_buf[i + cursor + 7],
378 ]);
379 cursor += 8;
380 }
381 if offset == 0xFFFFFFFF && cursor + 8 <= data_len {
383 offset = u64::from_le_bytes([
384 extra_buf[i + cursor],
385 extra_buf[i + cursor + 1],
386 extra_buf[i + cursor + 2],
387 extra_buf[i + cursor + 3],
388 extra_buf[i + cursor + 4],
389 extra_buf[i + cursor + 5],
390 extra_buf[i + cursor + 6],
391 extra_buf[i + cursor + 7],
392 ]);
393 }
394 break;
396 }
397 i += data_len;
398 }
399 }
400
401 if comment_len > 0 {
403 file.seek(SeekFrom::Current(comment_len as i64))?;
404 }
405
406 entries.push(ZipEntry {
407 name,
408 compressed_size,
409 uncompressed_size,
410 compression_method,
411 offset,
412 });
413 }
414
415 Ok(entries)
416 }
417
418 fn read_zip64_eocd(file: &mut BufReader<File>, eocd_offset: u64) -> Result<(u64, u64, u64)> {
420 let search_start = eocd_offset.saturating_sub(65557);
422 file.seek(SeekFrom::Start(search_start))?;
423 let mut buffer = Vec::new();
424 file.read_to_end(&mut buffer)?;
425
426 let mut locator_pos: Option<usize> = None;
427 for i in (0..buffer.len().saturating_sub(3)).rev() {
428 if buffer[i] == 0x50
429 && buffer[i + 1] == 0x4b
430 && buffer[i + 2] == 0x06
431 && buffer[i + 3] == 0x07
432 {
433 locator_pos = Some(i);
434 break;
435 }
436 }
437
438 let locator_pos = locator_pos
439 .ok_or_else(|| SZipError::InvalidFormat("ZIP64 EOCD locator not found".to_string()))?;
440
441 let rel_off_bytes = &buffer[locator_pos + 8..locator_pos + 16];
444 let zip64_eocd_offset = u64::from_le_bytes([
445 rel_off_bytes[0],
446 rel_off_bytes[1],
447 rel_off_bytes[2],
448 rel_off_bytes[3],
449 rel_off_bytes[4],
450 rel_off_bytes[5],
451 rel_off_bytes[6],
452 rel_off_bytes[7],
453 ]);
454
455 file.seek(SeekFrom::Start(zip64_eocd_offset))?;
457
458 let sig = Self::read_u32_le_static(file)?;
459 if sig != ZIP64_END_OF_CENTRAL_DIRECTORY_SIGNATURE {
460 return Err(SZipError::InvalidFormat(format!(
461 "Invalid ZIP64 EOCD signature: 0x{:08x}",
462 sig
463 )));
464 }
465
466 let _size = {
468 let mut buf = [0u8; 8];
469 file.read_exact(&mut buf)?;
470 u64::from_le_bytes(buf)
471 };
472
473 file.seek(SeekFrom::Current(12))?;
475
476 let total_entries = {
478 let mut buf = [0u8; 8];
479 file.read_exact(&mut buf)?;
480 u64::from_le_bytes(buf)
481 };
482
483 {
485 let mut buf = [0u8; 8];
486 file.read_exact(&mut buf)?;
487 }
489
490 let cd_size = {
492 let mut buf = [0u8; 8];
493 file.read_exact(&mut buf)?;
494 u64::from_le_bytes(buf)
495 };
496
497 let cd_offset = {
499 let mut buf = [0u8; 8];
500 file.read_exact(&mut buf)?;
501 u64::from_le_bytes(buf)
502 };
503
504 Ok((total_entries, cd_size, cd_offset))
505 }
506
507 fn find_eocd(file: &mut BufReader<File>) -> Result<u64> {
509 let file_size = file.seek(SeekFrom::End(0))?;
510
511 let search_start = file_size.saturating_sub(65557);
513 file.seek(SeekFrom::Start(search_start))?;
514
515 let mut buffer = Vec::new();
516 file.read_to_end(&mut buffer)?;
517
518 for i in (0..buffer.len().saturating_sub(3)).rev() {
520 if buffer[i] == 0x50
521 && buffer[i + 1] == 0x4b
522 && buffer[i + 2] == 0x05
523 && buffer[i + 3] == 0x06
524 {
525 return Ok(search_start + i as u64);
526 }
527 }
528
529 Err(SZipError::InvalidFormat(
530 "End of central directory not found".to_string(),
531 ))
532 }
533
534 fn read_u16_le(&mut self) -> Result<u16> {
535 let mut buf = [0u8; 2];
536 self.file.read_exact(&mut buf)?;
537 Ok(u16::from_le_bytes(buf))
538 }
539
540 fn read_u32_le(&mut self) -> Result<u32> {
541 let mut buf = [0u8; 4];
542 self.file.read_exact(&mut buf)?;
543 Ok(u32::from_le_bytes(buf))
544 }
545
546 fn read_u16_le_static(file: &mut BufReader<File>) -> Result<u16> {
547 let mut buf = [0u8; 2];
548 file.read_exact(&mut buf)?;
549 Ok(u16::from_le_bytes(buf))
550 }
551
552 fn read_u32_le_static(file: &mut BufReader<File>) -> Result<u32> {
553 let mut buf = [0u8; 4];
554 file.read_exact(&mut buf)?;
555 Ok(u32::from_le_bytes(buf))
556 }
557}