1use crate::error::{Result, SZipError};
7use flate2::read::DeflateDecoder;
8use std::fs::File;
9use std::io::{BufReader, Read, Seek, SeekFrom};
10use std::path::Path;
11
12#[cfg(feature = "encryption")]
13use crate::encryption::{AesDecryptor, AesStrength};
14
15const LOCAL_FILE_HEADER_SIGNATURE: u32 = 0x04034b50;
17
18const CENTRAL_DIRECTORY_SIGNATURE: u32 = 0x02014b50;
20
21const END_OF_CENTRAL_DIRECTORY_SIGNATURE: u32 = 0x06054b50;
23
24const ZIP64_END_OF_CENTRAL_DIRECTORY_SIGNATURE: u32 = 0x06064b50;
26
27#[derive(Debug, Clone)]
31pub struct ZipEntry {
32 pub name: String,
33 pub compressed_size: u64,
34 pub uncompressed_size: u64,
35 pub compression_method: u16,
36 pub offset: u64,
37 #[cfg(feature = "encryption")]
38 pub is_encrypted: bool,
39}
40
41pub struct StreamingZipReader {
43 file: BufReader<File>,
44 entries: Vec<ZipEntry>,
45 #[cfg(feature = "encryption")]
46 password: Option<String>,
47}
48
49impl StreamingZipReader {
50 pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
52 Self::open_with_buffer_size(path, None)
53 }
54
55 pub fn open_with_buffer_size<P: AsRef<Path>>(
75 path: P,
76 buffer_size: Option<usize>,
77 ) -> Result<Self> {
78 let file = File::open(path)?;
79
80 let buf_size = buffer_size.unwrap_or(512 * 1024); let mut file = BufReader::with_capacity(buf_size, file);
83
84 let entries = Self::read_central_directory(&mut file)?;
86
87 Ok(StreamingZipReader {
88 file,
89 entries,
90 #[cfg(feature = "encryption")]
91 password: None,
92 })
93 }
94
95 #[cfg(feature = "encryption")]
97 pub fn set_password(&mut self, password: impl Into<String>) -> &mut Self {
98 self.password = Some(password.into());
99 self
100 }
101
102 #[cfg(feature = "encryption")]
104 pub fn clear_password(&mut self) -> &mut Self {
105 self.password = None;
106 self
107 }
108
109 pub fn entries(&self) -> &[ZipEntry] {
111 &self.entries
112 }
113
114 pub fn find_entry(&self, name: &str) -> Option<&ZipEntry> {
116 self.entries.iter().find(|e| e.name == name)
117 }
118
119 pub fn read_entry(&mut self, entry: &ZipEntry) -> Result<Vec<u8>> {
121 self.file.seek(SeekFrom::Start(entry.offset))?;
123
124 let signature = self.read_u32_le()?;
126 if signature != LOCAL_FILE_HEADER_SIGNATURE {
127 return Err(SZipError::InvalidFormat(
128 "Invalid local file header signature".to_string(),
129 ));
130 }
131
132 self.file.seek(SeekFrom::Current(2))?;
134
135 let flags = self.read_u16_le()?;
137 let is_encrypted = (flags & 0x01) != 0;
138
139 let _compression_method = self.read_u16_le()?;
141
142 self.file.seek(SeekFrom::Current(8))?;
144
145 self.file.seek(SeekFrom::Current(8))?;
147
148 let filename_len = self.read_u16_le()? as i64;
150 let extra_len = self.read_u16_le()? as usize;
151
152 self.file.seek(SeekFrom::Current(filename_len))?;
154
155 #[cfg(feature = "encryption")]
157 let encryption_info = if is_encrypted {
158 eprintln!(
159 "DEBUG: File position before parse_aes_extra_field: 0x{:x}",
160 self.file.stream_position()?
161 );
162 eprintln!("DEBUG: extra_len = {}", extra_len);
163 self.parse_aes_extra_field(extra_len)?
164 } else {
165 self.file.seek(SeekFrom::Current(extra_len as i64))?;
167 None
168 };
169
170 #[cfg(not(feature = "encryption"))]
171 {
172 if is_encrypted {
173 return Err(SZipError::InvalidFormat(
174 "Encrypted entry found but encryption feature not enabled".to_string(),
175 ));
176 }
177 self.file.seek(SeekFrom::Current(extra_len as i64))?;
179 }
180
181 #[cfg(feature = "encryption")]
183 let data_size = if let Some((strength, _, _)) = encryption_info {
184 entry
186 .compressed_size
187 .saturating_sub((strength.salt_size() + 2 + 10) as u64)
188 } else {
189 entry.compressed_size
190 };
191
192 #[cfg(not(feature = "encryption"))]
193 let data_size = entry.compressed_size;
194
195 let mut compressed_data = vec![0u8; data_size as usize];
197 self.file.read_exact(&mut compressed_data)?;
198
199 #[cfg(feature = "encryption")]
201 let auth_code = if encryption_info.is_some() {
202 let mut ac = vec![0u8; 10];
203 self.file.read_exact(&mut ac)?;
204 Some(ac)
205 } else {
206 None
207 };
208
209 #[cfg(feature = "encryption")]
211 let decryptor_opt = if let Some((strength, salt, pw_verify)) = encryption_info {
212 let password = self.password.as_ref().ok_or_else(|| {
213 SZipError::InvalidFormat("Encrypted entry but no password set".to_string())
214 })?;
215
216 let mut decryptor = AesDecryptor::new(password, strength, &salt, &pw_verify)?;
218
219 decryptor.decrypt(&mut compressed_data)?;
221
222 Some(decryptor)
223 } else {
224 None
225 };
226
227 let data = if entry.compression_method == 8 {
229 let mut decoder = DeflateDecoder::new(&compressed_data[..]);
231 let mut decompressed = Vec::new();
232 decoder.read_to_end(&mut decompressed)?;
233 decompressed
234 } else if entry.compression_method == 0 {
235 compressed_data
237 } else if entry.compression_method == 93 {
238 #[cfg(feature = "zstd-support")]
240 {
241 zstd::decode_all(&compressed_data[..])?
242 }
243 #[cfg(not(feature = "zstd-support"))]
244 {
245 return Err(SZipError::UnsupportedCompression(entry.compression_method));
246 }
247 } else {
248 return Err(SZipError::UnsupportedCompression(entry.compression_method));
249 };
250
251 #[cfg(feature = "encryption")]
253 if let Some(mut decryptor) = decryptor_opt {
254 decryptor.update_hmac(&data);
256
257 if let Some(ac) = auth_code {
259 decryptor.verify_auth_code(&ac)?;
260 }
261 }
262
263 Ok(data)
264 }
265
266 pub fn read_entry_by_name(&mut self, name: &str) -> Result<Vec<u8>> {
268 let entry = self
269 .find_entry(name)
270 .ok_or_else(|| SZipError::EntryNotFound(name.to_string()))?
271 .clone();
272
273 self.read_entry(&entry)
274 }
275
276 pub fn read_entry_streaming_by_name(&mut self, name: &str) -> Result<Box<dyn Read + '_>> {
279 let entry = self
280 .find_entry(name)
281 .ok_or_else(|| SZipError::EntryNotFound(name.to_string()))?
282 .clone();
283
284 self.read_entry_streaming(&entry)
285 }
286
287 pub fn read_entry_streaming(&mut self, entry: &ZipEntry) -> Result<Box<dyn Read + '_>> {
290 self.file.seek(SeekFrom::Start(entry.offset))?;
292
293 let signature = self.read_u32_le()?;
295 if signature != LOCAL_FILE_HEADER_SIGNATURE {
296 return Err(SZipError::InvalidFormat(
297 "Invalid local file header signature".to_string(),
298 ));
299 }
300
301 self.file.seek(SeekFrom::Current(6))?;
303
304 self.file.seek(SeekFrom::Current(8))?;
306
307 self.file.seek(SeekFrom::Current(8))?;
309
310 let filename_len = self.read_u16_le()? as i64;
312 let extra_len = self.read_u16_le()? as i64;
313
314 self.file
316 .seek(SeekFrom::Current(filename_len + extra_len))?;
317
318 let limited_reader = (&mut self.file).take(entry.compressed_size);
320
321 if entry.compression_method == 8 {
323 Ok(Box::new(DeflateDecoder::new(limited_reader)))
325 } else if entry.compression_method == 0 {
326 Ok(Box::new(limited_reader))
328 } else if entry.compression_method == 93 {
329 #[cfg(feature = "zstd-support")]
331 {
332 Ok(Box::new(zstd::Decoder::new(limited_reader)?))
333 }
334 #[cfg(not(feature = "zstd-support"))]
335 {
336 Err(SZipError::UnsupportedCompression(entry.compression_method))
337 }
338 } else {
339 Err(SZipError::UnsupportedCompression(entry.compression_method))
340 }
341 }
342
343 pub fn read_entry_by_name_streaming(&mut self, name: &str) -> Result<Box<dyn Read + '_>> {
345 let entry = self
346 .find_entry(name)
347 .ok_or_else(|| SZipError::EntryNotFound(name.to_string()))?
348 .clone();
349
350 self.read_entry_streaming(&entry)
351 }
352
353 fn read_central_directory(file: &mut BufReader<File>) -> Result<Vec<ZipEntry>> {
355 let eocd_offset = Self::find_eocd(file)?;
357
358 file.seek(SeekFrom::Start(eocd_offset))?;
360
361 let signature = Self::read_u32_le_static(file)?;
363 if signature != END_OF_CENTRAL_DIRECTORY_SIGNATURE {
364 return Err(SZipError::InvalidFormat(format!(
365 "Invalid end of central directory signature: 0x{:08x}",
366 signature
367 )));
368 }
369
370 file.seek(SeekFrom::Current(4))?;
372
373 let _entries_on_disk = Self::read_u16_le_static(file)?;
375
376 let total_entries_16 = Self::read_u16_le_static(file)?;
380
381 let cd_size_32 = Self::read_u32_le_static(file)?;
383
384 let cd_offset_32 = Self::read_u32_le_static(file)? as u64;
386
387 let mut total_entries = total_entries_16 as usize;
389 let mut cd_offset = cd_offset_32;
390 let _cd_size = cd_size_32 as u64;
391
392 if total_entries_16 == 0xFFFF || cd_size_32 == 0xFFFFFFFF || cd_offset_32 == 0xFFFFFFFF {
393 let (zip64_total_entries, zip64_cd_size, zip64_cd_offset) =
395 Self::read_zip64_eocd(file, eocd_offset)?;
396 total_entries = zip64_total_entries as usize;
397 cd_offset = zip64_cd_offset;
398 let _ = zip64_cd_size;
400 }
401
402 file.seek(SeekFrom::Start(cd_offset))?;
404
405 let mut entries = Vec::with_capacity(total_entries);
407 for _ in 0..total_entries {
408 let signature = Self::read_u32_le_static(file)?;
409 if signature != CENTRAL_DIRECTORY_SIGNATURE {
410 break;
411 }
412
413 file.seek(SeekFrom::Current(4))?;
415
416 #[cfg_attr(not(feature = "encryption"), allow(unused_variables))]
418 let flags = Self::read_u16_le_static(file)?;
419
420 let compression_method = Self::read_u16_le_static(file)?;
421
422 file.seek(SeekFrom::Current(8))?;
424
425 let compressed_size_32 = Self::read_u32_le_static(file)? as u64;
427 let uncompressed_size_32 = Self::read_u32_le_static(file)? as u64;
428 let filename_len = Self::read_u16_le_static(file)? as usize;
429 let extra_len = Self::read_u16_le_static(file)? as usize;
430 let comment_len = Self::read_u16_le_static(file)? as usize;
431
432 file.seek(SeekFrom::Current(8))?;
434
435 let mut offset = Self::read_u32_le_static(file)? as u64;
436
437 let mut filename_buf = vec![0u8; filename_len];
439 file.read_exact(&mut filename_buf)?;
440 let name = String::from_utf8_lossy(&filename_buf).to_string();
441
442 let mut extra_buf = vec![0u8; extra_len];
444 if extra_len > 0 {
445 file.read_exact(&mut extra_buf)?;
446 }
447
448 let mut compressed_size = compressed_size_32;
450 let mut uncompressed_size = uncompressed_size_32;
451
452 if compressed_size_32 == 0xFFFFFFFF
453 || uncompressed_size_32 == 0xFFFFFFFF
454 || offset == 0xFFFFFFFF
455 {
456 let mut i = 0usize;
458 while i + 4 <= extra_buf.len() {
459 let id = u16::from_le_bytes([extra_buf[i], extra_buf[i + 1]]);
460 let data_len =
461 u16::from_le_bytes([extra_buf[i + 2], extra_buf[i + 3]]) as usize;
462 i += 4;
463 if i + data_len > extra_buf.len() {
464 break;
465 }
466 if id == 0x0001 {
467 let mut cursor = 0usize;
469 if uncompressed_size_32 == 0xFFFFFFFF && cursor + 8 <= data_len {
471 uncompressed_size = u64::from_le_bytes([
472 extra_buf[i + cursor],
473 extra_buf[i + cursor + 1],
474 extra_buf[i + cursor + 2],
475 extra_buf[i + cursor + 3],
476 extra_buf[i + cursor + 4],
477 extra_buf[i + cursor + 5],
478 extra_buf[i + cursor + 6],
479 extra_buf[i + cursor + 7],
480 ]);
481 cursor += 8;
482 }
483 if compressed_size_32 == 0xFFFFFFFF && cursor + 8 <= data_len {
485 compressed_size = u64::from_le_bytes([
486 extra_buf[i + cursor],
487 extra_buf[i + cursor + 1],
488 extra_buf[i + cursor + 2],
489 extra_buf[i + cursor + 3],
490 extra_buf[i + cursor + 4],
491 extra_buf[i + cursor + 5],
492 extra_buf[i + cursor + 6],
493 extra_buf[i + cursor + 7],
494 ]);
495 cursor += 8;
496 }
497 if offset == 0xFFFFFFFF && cursor + 8 <= data_len {
499 offset = u64::from_le_bytes([
500 extra_buf[i + cursor],
501 extra_buf[i + cursor + 1],
502 extra_buf[i + cursor + 2],
503 extra_buf[i + cursor + 3],
504 extra_buf[i + cursor + 4],
505 extra_buf[i + cursor + 5],
506 extra_buf[i + cursor + 6],
507 extra_buf[i + cursor + 7],
508 ]);
509 }
510 break;
512 }
513 i += data_len;
514 }
515 }
516
517 if comment_len > 0 {
519 file.seek(SeekFrom::Current(comment_len as i64))?;
520 }
521
522 entries.push(ZipEntry {
523 name,
524 compressed_size,
525 uncompressed_size,
526 compression_method,
527 offset,
528 #[cfg(feature = "encryption")]
529 is_encrypted: (flags & 0x01) != 0,
530 });
531 }
532
533 Ok(entries)
534 }
535
536 fn read_zip64_eocd(file: &mut BufReader<File>, eocd_offset: u64) -> Result<(u64, u64, u64)> {
538 let search_start = eocd_offset.saturating_sub(65557);
540 file.seek(SeekFrom::Start(search_start))?;
541 let mut buffer = Vec::new();
542 file.read_to_end(&mut buffer)?;
543
544 let mut locator_pos: Option<usize> = None;
545 for i in (0..buffer.len().saturating_sub(3)).rev() {
546 if buffer[i] == 0x50
547 && buffer[i + 1] == 0x4b
548 && buffer[i + 2] == 0x06
549 && buffer[i + 3] == 0x07
550 {
551 locator_pos = Some(i);
552 break;
553 }
554 }
555
556 let locator_pos = locator_pos
557 .ok_or_else(|| SZipError::InvalidFormat("ZIP64 EOCD locator not found".to_string()))?;
558
559 let rel_off_bytes = &buffer[locator_pos + 8..locator_pos + 16];
562 let zip64_eocd_offset = u64::from_le_bytes([
563 rel_off_bytes[0],
564 rel_off_bytes[1],
565 rel_off_bytes[2],
566 rel_off_bytes[3],
567 rel_off_bytes[4],
568 rel_off_bytes[5],
569 rel_off_bytes[6],
570 rel_off_bytes[7],
571 ]);
572
573 file.seek(SeekFrom::Start(zip64_eocd_offset))?;
575
576 let sig = Self::read_u32_le_static(file)?;
577 if sig != ZIP64_END_OF_CENTRAL_DIRECTORY_SIGNATURE {
578 return Err(SZipError::InvalidFormat(format!(
579 "Invalid ZIP64 EOCD signature: 0x{:08x}",
580 sig
581 )));
582 }
583
584 let _size = {
586 let mut buf = [0u8; 8];
587 file.read_exact(&mut buf)?;
588 u64::from_le_bytes(buf)
589 };
590
591 file.seek(SeekFrom::Current(12))?;
593
594 let total_entries = {
596 let mut buf = [0u8; 8];
597 file.read_exact(&mut buf)?;
598 u64::from_le_bytes(buf)
599 };
600
601 {
603 let mut buf = [0u8; 8];
604 file.read_exact(&mut buf)?;
605 }
607
608 let cd_size = {
610 let mut buf = [0u8; 8];
611 file.read_exact(&mut buf)?;
612 u64::from_le_bytes(buf)
613 };
614
615 let cd_offset = {
617 let mut buf = [0u8; 8];
618 file.read_exact(&mut buf)?;
619 u64::from_le_bytes(buf)
620 };
621
622 Ok((total_entries, cd_size, cd_offset))
623 }
624
625 fn find_eocd(file: &mut BufReader<File>) -> Result<u64> {
627 let file_size = file.seek(SeekFrom::End(0))?;
628
629 let search_start = file_size.saturating_sub(65557);
631 file.seek(SeekFrom::Start(search_start))?;
632
633 let mut buffer = Vec::new();
634 file.read_to_end(&mut buffer)?;
635
636 for i in (0..buffer.len().saturating_sub(3)).rev() {
638 if buffer[i] == 0x50
639 && buffer[i + 1] == 0x4b
640 && buffer[i + 2] == 0x05
641 && buffer[i + 3] == 0x06
642 {
643 return Ok(search_start + i as u64);
644 }
645 }
646
647 Err(SZipError::InvalidFormat(
648 "End of central directory not found".to_string(),
649 ))
650 }
651
652 fn read_u16_le(&mut self) -> Result<u16> {
653 let mut buf = [0u8; 2];
654 self.file.read_exact(&mut buf)?;
655 Ok(u16::from_le_bytes(buf))
656 }
657
658 fn read_u32_le(&mut self) -> Result<u32> {
659 let mut buf = [0u8; 4];
660 self.file.read_exact(&mut buf)?;
661 Ok(u32::from_le_bytes(buf))
662 }
663
664 fn read_u16_le_static(file: &mut BufReader<File>) -> Result<u16> {
665 let mut buf = [0u8; 2];
666 file.read_exact(&mut buf)?;
667 Ok(u16::from_le_bytes(buf))
668 }
669
670 fn read_u32_le_static(file: &mut BufReader<File>) -> Result<u32> {
671 let mut buf = [0u8; 4];
672 file.read_exact(&mut buf)?;
673 Ok(u32::from_le_bytes(buf))
674 }
675
676 #[cfg(feature = "encryption")]
678 #[allow(clippy::type_complexity)]
679 fn parse_aes_extra_field(
680 &mut self,
681 extra_len: usize,
682 ) -> Result<Option<(AesStrength, Vec<u8>, [u8; 2])>> {
683 if extra_len == 0 {
684 return Ok(None);
685 }
686
687 let mut extra_buf = vec![0u8; extra_len];
688 self.file.read_exact(&mut extra_buf)?;
689
690 let mut i = 0usize;
692 while i + 4 <= extra_buf.len() {
693 let id = u16::from_le_bytes([extra_buf[i], extra_buf[i + 1]]);
694 let data_len = u16::from_le_bytes([extra_buf[i + 2], extra_buf[i + 3]]) as usize;
695 i += 4;
696
697 if i + data_len > extra_buf.len() {
698 break;
699 }
700
701 if id == 0x9901 {
702 if data_len < 7 {
706 return Err(SZipError::InvalidFormat(
707 "Invalid AES extra field".to_string(),
708 ));
709 }
710
711 let strength_code = u16::from_le_bytes([extra_buf[i + 4], extra_buf[i + 5]]);
712
713 let strength = match strength_code {
714 0x03 => AesStrength::Aes256,
715 _ => {
716 return Err(SZipError::InvalidFormat(format!(
717 "Unsupported AES strength: {}",
718 strength_code
719 )))
720 }
721 };
722
723 let salt_size = strength.salt_size();
726
727 let pos_before = self.file.stream_position()?;
728 eprintln!(
729 "DEBUG: File position before reading salt: 0x{:x}",
730 pos_before
731 );
732
733 let mut salt = vec![0u8; salt_size];
734 self.file.read_exact(&mut salt)?;
735
736 let mut pw_verify = [0u8; 2];
737 self.file.read_exact(&mut pw_verify)?;
738
739 eprintln!("DEBUG: Read salt ({} bytes): {:02x?}", salt.len(), salt);
740 eprintln!(
741 "DEBUG: Read pw_verify: {:02x}{:02x}",
742 pw_verify[0], pw_verify[1]
743 );
744
745 return Ok(Some((strength, salt, pw_verify)));
746 }
747
748 i += data_len;
749 }
750
751 Ok(None)
752 }
753}