1use std::io::{self, Read, Seek, SeekFrom};
9use std::path::PathBuf;
10
11use crate::bytes::Reader;
12use crate::codec::Decoder;
13use crate::crypto::{AesInfo, AesReader, ZipCryptoReader};
14use crate::{FormatError, ZipCoreError};
15
16const EOCD_SIG: u32 = 0x0605_4b50;
17const CD_HEADER_SIG: u32 = 0x0201_4b50;
18const LFH_SIG: u32 = 0x0403_4b50;
19const ZIP64_EOCD_SIG: u32 = 0x0606_4b50;
20const ZIP64_LOCATOR_SIG: u32 = 0x0706_4b50;
21const ZIP64_EXTRA_ID: u16 = 0x0001;
23const U32_SENTINEL: u32 = 0xFFFF_FFFF;
25const U16_SENTINEL: u16 = 0xFFFF;
27
28const EOCD_MIN: usize = 22;
30const EOCD_SCAN_MAX: usize = EOCD_MIN + u16::MAX as usize;
32const ZIP64_LOCATOR_LEN: usize = 20;
34const LFH_FIXED: usize = 30;
36const MAX_ENTRIES: usize = 16_000_000;
38
39#[derive(Debug, Clone, Copy, PartialEq, Eq)]
42pub enum CompressionMethod {
43 Stored,
45 Deflated,
47 Deflate64,
49 Bzip2,
51 Lzma,
53 Zstd,
55 Xz,
57 Unknown(u16),
59}
60
61impl CompressionMethod {
62 pub(crate) fn from_u16(raw: u16) -> Self {
63 match raw {
64 0 => Self::Stored,
65 8 => Self::Deflated,
66 9 => Self::Deflate64,
67 12 => Self::Bzip2,
68 14 => Self::Lzma,
69 93 => Self::Zstd,
70 95 => Self::Xz,
71 other => Self::Unknown(other),
72 }
73 }
74}
75
76#[derive(Debug, Clone)]
78pub(crate) struct CentralEntry {
79 pub(crate) name: String,
80 pub(crate) method: CompressionMethod,
81 pub(crate) flags: u16,
82 pub(crate) crc32: u32,
83 pub(crate) compressed_size: u64,
84 pub(crate) uncompressed_size: u64,
85 pub(crate) lfh_offset: u64,
86 pub(crate) last_mod_time: u16,
89 pub(crate) aes: Option<AesInfo>,
91}
92
93impl CentralEntry {
94 fn is_dir(&self) -> bool {
95 self.name.ends_with('/') || self.name.ends_with('\\')
96 }
97}
98
99#[derive(Debug, Clone)]
102pub struct ArchiveSummary {
103 pub file_len: u64,
105 pub central_dir_offset: u64,
107 pub central_dir_size: u64,
109 pub eocd_end_offset: u64,
112 pub comment_len: u16,
114 pub disk_number: u32,
116 pub cd_start_disk: u32,
118}
119
120pub struct ZipArchive<R> {
122 reader: R,
123 entries: Vec<CentralEntry>,
124 summary: ArchiveSummary,
125}
126
127impl<R: Read + Seek> ZipArchive<R> {
128 pub fn new(mut reader: R) -> Result<Self, ZipCoreError> {
130 let file_len = reader.seek(SeekFrom::End(0))?;
131 let (entries, summary) = parse_central_directory(&mut reader, file_len)?;
132 Ok(Self {
133 reader,
134 entries,
135 summary,
136 })
137 }
138
139 pub fn summary(&self) -> &ArchiveSummary {
141 &self.summary
142 }
143
144 pub fn len(&self) -> usize {
146 self.entries.len()
147 }
148
149 pub fn is_empty(&self) -> bool {
151 self.entries.is_empty()
152 }
153
154 pub fn file_names(&self) -> impl Iterator<Item = &str> {
156 self.entries.iter().map(|e| e.name.as_str())
157 }
158
159 pub fn by_index(&mut self, i: usize) -> Result<ZipFile<'_>, ZipCoreError> {
161 let meta = self
162 .entries
163 .get(i)
164 .ok_or(ZipCoreError::IndexOutOfBounds(i))?
165 .clone();
166 self.open(meta)
167 }
168
169 pub fn by_name(&mut self, name: &str) -> Result<ZipFile<'_>, ZipCoreError> {
171 let meta = self
172 .entries
173 .iter()
174 .find(|e| e.name == name)
175 .ok_or_else(|| ZipCoreError::EntryNotFound(name.to_string()))?
176 .clone();
177 self.open(meta)
178 }
179
180 pub fn by_index_decrypt(
183 &mut self,
184 i: usize,
185 password: &[u8],
186 ) -> Result<ZipFile<'_>, ZipCoreError> {
187 let meta = self
188 .entries
189 .get(i)
190 .ok_or(ZipCoreError::IndexOutOfBounds(i))?
191 .clone();
192 self.open_decrypt(meta, password)
193 }
194
195 pub fn by_name_decrypt(
197 &mut self,
198 name: &str,
199 password: &[u8],
200 ) -> Result<ZipFile<'_>, ZipCoreError> {
201 let meta = self
202 .entries
203 .iter()
204 .find(|e| e.name == name)
205 .ok_or_else(|| ZipCoreError::EntryNotFound(name.to_string()))?
206 .clone();
207 self.open_decrypt(meta, password)
208 }
209
210 pub fn structural_view(&mut self) -> Result<Vec<EntryLayout>, ZipCoreError> {
215 let metas = self.entries.clone();
216 let mut out = Vec::with_capacity(metas.len());
217 for (index, m) in metas.iter().enumerate() {
218 let (local, data_start) = read_lfh_fields(&mut self.reader, m.lfh_offset)?;
219 out.push(EntryLayout {
220 index,
221 lfh_offset: m.lfh_offset,
222 data_start,
223 central: HeaderFields {
224 name: m.name.clone(),
225 method: m.method,
226 flags: m.flags,
227 crc32: m.crc32,
228 compressed_size: m.compressed_size,
229 uncompressed_size: m.uncompressed_size,
230 },
231 local,
232 });
233 }
234 Ok(out)
235 }
236
237 fn open(&mut self, meta: CentralEntry) -> Result<ZipFile<'_>, ZipCoreError> {
238 if meta.flags & 0x0001 != 0 {
239 return Err(ZipCoreError::EncryptedNoPassword(meta.name.clone()));
240 }
241 let (_local, data_start) = read_lfh_fields(&mut self.reader, meta.lfh_offset)?;
242 self.reader.seek(SeekFrom::Start(data_start))?;
243 let limited: Box<dyn Read + '_> = Box::new((&mut self.reader).take(meta.compressed_size));
244 let decoder = Decoder::new(meta.method, meta.uncompressed_size, limited)?;
245 Ok(ZipFile {
246 data_start,
247 decoder,
248 hasher: crc32fast::Hasher::new(),
249 bytes_out: 0,
250 verified: false,
251 verify_crc: true,
252 meta,
253 })
254 }
255
256 fn open_decrypt(
257 &mut self,
258 meta: CentralEntry,
259 password: &[u8],
260 ) -> Result<ZipFile<'_>, ZipCoreError> {
261 if meta.flags & 0x0001 == 0 && meta.aes.is_none() {
263 return self.open(meta);
264 }
265 let (_local, data_start) = read_lfh_fields(&mut self.reader, meta.lfh_offset)?;
266 self.reader.seek(SeekFrom::Start(data_start))?;
267 let take = (&mut self.reader).take(meta.compressed_size);
268 let (reader, method, verify_crc): (Box<dyn Read + '_>, CompressionMethod, bool) =
269 if let Some(aes) = meta.aes {
270 let r = AesReader::new(take, password, aes, meta.compressed_size, &meta.name)?;
271 (
274 Box::new(r),
275 CompressionMethod::from_u16(aes.actual_method),
276 !aes.is_ae2,
277 )
278 } else {
279 let check = zipcrypto_check_byte(meta.flags, meta.crc32, meta.last_mod_time);
282 let r = ZipCryptoReader::new(take, password, check, &meta.name)?;
283 (Box::new(r), meta.method, true)
284 };
285 let decoder = Decoder::new(method, meta.uncompressed_size, reader)?;
286 Ok(ZipFile {
287 data_start,
288 decoder,
289 hasher: crc32fast::Hasher::new(),
290 bytes_out: 0,
291 verified: false,
292 verify_crc,
293 meta,
294 })
295 }
296}
297
298#[derive(Debug, Clone, PartialEq, Eq)]
301pub struct HeaderFields {
302 pub name: String,
304 pub method: CompressionMethod,
306 pub flags: u16,
308 pub crc32: u32,
310 pub compressed_size: u64,
312 pub uncompressed_size: u64,
314}
315
316#[derive(Debug, Clone)]
319pub struct EntryLayout {
320 pub index: usize,
322 pub lfh_offset: u64,
324 pub data_start: u64,
326 pub central: HeaderFields,
328 pub local: HeaderFields,
330}
331
332fn read_lfh_fields<R: Read + Seek>(
336 reader: &mut R,
337 lfh_offset: u64,
338) -> Result<(HeaderFields, u64), ZipCoreError> {
339 reader.seek(SeekFrom::Start(lfh_offset))?;
340 let mut fixed = [0u8; LFH_FIXED];
341 reader.read_exact(&mut fixed)?;
342 let mut r = Reader::new(&fixed);
343 if r.u32()? != LFH_SIG {
344 return Err(FormatError::BadSignature {
345 what: "local file header",
346 offset: lfh_offset,
347 }
348 .into());
349 }
350 let _version_needed = r.u16()?;
351 let flags = r.u16()?;
352 let method = CompressionMethod::from_u16(r.u16()?);
353 let _mod_time = r.u16()?;
354 let _mod_date = r.u16()?;
355 let crc32 = r.u32()?;
356 let compressed_size = u64::from(r.u32()?);
357 let uncompressed_size = u64::from(r.u32()?);
358 let name_len = usize::from(r.u16()?);
359 let extra_len = usize::from(r.u16()?);
360
361 let mut name_buf = vec![0u8; name_len];
362 reader.read_exact(&mut name_buf)?;
363 let name = decode_name(&name_buf, flags);
364 let data_start = lfh_offset + LFH_FIXED as u64 + name_len as u64 + extra_len as u64;
365
366 Ok((
367 HeaderFields {
368 name,
369 method,
370 flags,
371 crc32,
372 compressed_size,
373 uncompressed_size,
374 },
375 data_start,
376 ))
377}
378
379struct Eocd32 {
382 disk_number: u16,
383 cd_start_disk: u16,
384 total_entries: u16,
385 cd_size: u32,
386 cd_offset: u32,
387 comment_len: u16,
388}
389
390fn parse_central_directory<R: Read + Seek>(
391 reader: &mut R,
392 file_len: u64,
393) -> Result<(Vec<CentralEntry>, ArchiveSummary), ZipCoreError> {
394 let scan_len = file_len.min(EOCD_SCAN_MAX as u64);
395 if scan_len < EOCD_MIN as u64 {
396 return Err(FormatError::NoEocd.into());
397 }
398 let scan_start = file_len - scan_len;
399 reader.seek(SeekFrom::Start(scan_start))?;
400 let mut tail = vec![0u8; scan_len as usize];
401 reader.read_exact(&mut tail)?;
402
403 let eocd_rel = find_eocd(&tail).ok_or(FormatError::NoEocd)?;
404 let eocd = parse_eocd(&tail[eocd_rel..])?;
405 let eocd_end_offset =
408 scan_start + eocd_rel as u64 + EOCD_MIN as u64 + u64::from(eocd.comment_len);
409
410 let (cd_offset, cd_size, total_entries, disk_number, cd_start_disk) = if eocd.cd_offset
413 == U32_SENTINEL
414 || eocd.cd_size == U32_SENTINEL
415 || eocd.total_entries == U16_SENTINEL
416 {
417 resolve_zip64_eocd(reader, &tail, eocd_rel)?
418 } else {
419 (
420 u64::from(eocd.cd_offset),
421 u64::from(eocd.cd_size),
422 usize::from(eocd.total_entries),
423 u32::from(eocd.disk_number),
424 u32::from(eocd.cd_start_disk),
425 )
426 };
427
428 match cd_offset.checked_add(cd_size) {
429 Some(end) if end <= file_len => {}
430 _ => return Err(FormatError::CentralDirOutOfRange { cd_offset, cd_size }.into()),
431 }
432 if total_entries > MAX_ENTRIES {
433 return Err(FormatError::TooManyEntries(total_entries).into());
434 }
435
436 reader.seek(SeekFrom::Start(cd_offset))?;
437 let mut cd = vec![0u8; cd_size as usize];
438 reader.read_exact(&mut cd)?;
439
440 let entries = parse_cd_entries(&cd, total_entries)?;
441 let summary = ArchiveSummary {
442 file_len,
443 central_dir_offset: cd_offset,
444 central_dir_size: cd_size,
445 eocd_end_offset,
446 comment_len: eocd.comment_len,
447 disk_number,
448 cd_start_disk,
449 };
450 Ok((entries, summary))
451}
452
453fn find_eocd(tail: &[u8]) -> Option<usize> {
455 if tail.len() < EOCD_MIN {
456 return None; }
458 let sig = EOCD_SIG.to_le_bytes();
459 (0..=tail.len() - EOCD_MIN)
461 .rev()
462 .find(|&i| tail[i..i + 4] == sig)
463}
464
465fn parse_eocd(buf: &[u8]) -> Result<Eocd32, ZipCoreError> {
467 let mut r = Reader::new(buf);
468 if r.u32()? != EOCD_SIG {
469 return Err(FormatError::NoEocd.into()); }
471 let disk_number = r.u16()?;
472 let cd_start_disk = r.u16()?;
473 let _entries_this_disk = r.u16()?;
474 let total_entries = r.u16()?;
475 let cd_size = r.u32()?;
476 let cd_offset = r.u32()?;
477 let comment_len = r.u16()?;
478 Ok(Eocd32 {
479 disk_number,
480 cd_start_disk,
481 total_entries,
482 cd_size,
483 cd_offset,
484 comment_len,
485 })
486}
487
488fn resolve_zip64_eocd<R: Read + Seek>(
492 reader: &mut R,
493 tail: &[u8],
494 eocd_rel: usize,
495) -> Result<(u64, u64, usize, u32, u32), ZipCoreError> {
496 if eocd_rel < ZIP64_LOCATOR_LEN {
497 return Err(FormatError::Zip64Unsupported.into());
498 }
499 let mut loc = Reader::new(&tail[eocd_rel - ZIP64_LOCATOR_LEN..eocd_rel]);
500 if loc.u32()? != ZIP64_LOCATOR_SIG {
501 return Err(FormatError::Zip64Unsupported.into());
502 }
503 let _disk = loc.u32()?;
504 let z64_eocd_offset = loc.u64()?;
505
506 reader.seek(SeekFrom::Start(z64_eocd_offset))?;
507 let mut rec = [0u8; 56];
508 reader.read_exact(&mut rec)?;
509 let mut r = Reader::new(&rec);
510 if r.u32()? != ZIP64_EOCD_SIG {
511 return Err(FormatError::BadSignature {
512 what: "Zip64 EOCD record",
513 offset: z64_eocd_offset,
514 }
515 .into());
516 }
517 let _record_size = r.u64()?;
518 let _version_made_by = r.u16()?;
519 let _version_needed = r.u16()?;
520 let disk_number = r.u32()?;
521 let cd_start_disk = r.u32()?;
522 let _entries_this_disk = r.u64()?;
523 let total_entries = r.u64()?;
524 let cd_size = r.u64()?;
525 let cd_offset = r.u64()?;
526 let total =
527 usize::try_from(total_entries).map_err(|_| FormatError::TooManyEntries(usize::MAX))?;
528 Ok((cd_offset, cd_size, total, disk_number, cd_start_disk))
529}
530
531fn parse_cd_entries(cd: &[u8], total_entries: usize) -> Result<Vec<CentralEntry>, ZipCoreError> {
533 let mut r = Reader::new(cd);
534 let mut entries = Vec::new();
535 for _ in 0..total_entries {
536 if r.remaining() < 46 {
537 return Err(FormatError::Truncated.into());
538 }
539 if r.u32()? != CD_HEADER_SIG {
540 return Err(FormatError::BadSignature {
541 what: "central directory header",
542 offset: (cd.len() - r.remaining()) as u64,
543 }
544 .into());
545 }
546 let _version_made_by = r.u16()?;
547 let _version_needed = r.u16()?;
548 let flags = r.u16()?;
549 let method_raw = r.u16()?;
550 let method = CompressionMethod::from_u16(method_raw);
551 let last_mod_time = r.u16()?;
552 let _mod_date = r.u16()?;
553 let crc32 = r.u32()?;
554 let compressed_size32 = r.u32()?;
555 let uncompressed_size32 = r.u32()?;
556 let name_len = usize::from(r.u16()?);
557 let extra_len = usize::from(r.u16()?);
558 let comment_len = usize::from(r.u16()?);
559 let _disk_start = r.u16()?;
560 let _internal_attrs = r.u16()?;
561 let _external_attrs = r.u32()?;
562 let lfh_offset32 = r.u32()?;
563
564 let name_bytes = r.take(name_len)?;
565 let extra = r.take(extra_len)?;
566 let _comment = r.take(comment_len)?;
567
568 let mut uncompressed_size = u64::from(uncompressed_size32);
572 let mut compressed_size = u64::from(compressed_size32);
573 let mut lfh_offset = u64::from(lfh_offset32);
574 if uncompressed_size32 == U32_SENTINEL
575 || compressed_size32 == U32_SENTINEL
576 || lfh_offset32 == U32_SENTINEL
577 {
578 apply_zip64_extra(
579 extra,
580 uncompressed_size32 == U32_SENTINEL,
581 compressed_size32 == U32_SENTINEL,
582 lfh_offset32 == U32_SENTINEL,
583 &mut uncompressed_size,
584 &mut compressed_size,
585 &mut lfh_offset,
586 )?;
587 }
588
589 let name = decode_name(name_bytes, flags);
593 let aes = if method_raw == 99 {
596 parse_aes_extra(extra)
597 } else {
598 None
599 };
600
601 entries.push(CentralEntry {
602 name,
603 method,
604 flags,
605 crc32,
606 compressed_size,
607 uncompressed_size,
608 lfh_offset,
609 last_mod_time,
610 aes,
611 });
612 }
613 Ok(entries)
614}
615
616fn apply_zip64_extra(
621 extra: &[u8],
622 need_uncompressed: bool,
623 need_compressed: bool,
624 need_offset: bool,
625 uncompressed_size: &mut u64,
626 compressed_size: &mut u64,
627 lfh_offset: &mut u64,
628) -> Result<(), ZipCoreError> {
629 let mut r = Reader::new(extra);
630 while r.remaining() >= 4 {
631 let id = r.u16()?;
632 let size = usize::from(r.u16()?);
633 if id == ZIP64_EXTRA_ID {
634 let mut z = Reader::new(r.take(size)?);
635 if need_uncompressed {
636 *uncompressed_size = z.u64()?;
637 }
638 if need_compressed {
639 *compressed_size = z.u64()?;
640 }
641 if need_offset {
642 *lfh_offset = z.u64()?;
643 }
644 return Ok(());
645 }
646 r.skip(size)?;
647 }
648 Err(FormatError::Zip64Inconsistent.into())
649}
650
651fn parse_aes_extra(extra: &[u8]) -> Option<AesInfo> {
654 let mut r = Reader::new(extra);
655 while r.remaining() >= 4 {
656 let id = r.u16().ok()?;
657 let size = usize::from(r.u16().ok()?);
658 if id == 0x9901 {
659 let data = r.take(size).ok()?;
660 let mut d = Reader::new(data);
661 let version = d.u16().ok()?; let _vendor = d.u16().ok()?; let strength = d.take(1).ok()?[0];
664 let actual_method = d.u16().ok()?;
665 return Some(AesInfo {
666 strength,
667 actual_method,
668 is_ae2: version == 2,
669 });
670 }
671 r.skip(size).ok()?;
672 }
673 None
674}
675
676fn zipcrypto_check_byte(flags: u16, crc32: u32, last_mod_time: u16) -> u8 {
680 if flags & 0x0008 != 0 {
681 (last_mod_time >> 8) as u8
682 } else {
683 (crc32 >> 24) as u8
684 }
685}
686
687fn decode_name(bytes: &[u8], flags: u16) -> String {
690 if flags & 0x0800 != 0 || bytes.is_ascii() {
692 return String::from_utf8_lossy(bytes).into_owned();
693 }
694 bytes.iter().map(|&b| crate::cp437::decode(b)).collect()
695}
696
697pub struct ZipFile<'a> {
700 meta: CentralEntry,
701 data_start: u64,
702 decoder: Decoder<Box<dyn Read + 'a>>,
703 hasher: crc32fast::Hasher,
704 bytes_out: u64,
705 verified: bool,
706 verify_crc: bool,
709}
710
711impl ZipFile<'_> {
712 pub fn name(&self) -> &str {
714 &self.meta.name
715 }
716
717 pub fn compression(&self) -> CompressionMethod {
719 self.meta.method
720 }
721
722 pub fn size(&self) -> u64 {
724 self.meta.uncompressed_size
725 }
726
727 pub fn compressed_size(&self) -> u64 {
729 self.meta.compressed_size
730 }
731
732 pub fn crc32(&self) -> u32 {
734 self.meta.crc32
735 }
736
737 pub fn data_start(&self) -> u64 {
740 self.data_start
741 }
742
743 pub fn flags(&self) -> u16 {
745 self.meta.flags
746 }
747
748 pub fn is_dir(&self) -> bool {
750 self.meta.is_dir()
751 }
752
753 pub fn enclosed_name(&self) -> Option<PathBuf> {
758 enclosed_name(&self.meta.name)
759 }
760}
761
762fn enclosed_name(name: &str) -> Option<PathBuf> {
766 if name.is_empty() || name.contains('\0') {
767 return None;
768 }
769 if name.starts_with('/') || name.starts_with('\\') {
770 return None; }
772 let b = name.as_bytes();
773 if b.len() >= 2 && b[1] == b':' && b[0].is_ascii_alphabetic() {
774 return None; }
776 let mut out = PathBuf::new();
777 for comp in name.split(['/', '\\']) {
778 match comp {
779 "" | "." => {}
780 ".." => return None,
781 other => out.push(other),
782 }
783 }
784 if out.as_os_str().is_empty() {
785 return None;
786 }
787 Some(out)
788}
789
790impl Read for ZipFile<'_> {
791 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
792 let n = self.decoder.read(buf)?;
793 if n == 0 {
794 if !self.verified {
795 self.verified = true;
796 let actual = self.hasher.clone().finalize();
797 if self.verify_crc && actual != self.meta.crc32 {
798 return Err(io::Error::other(ZipCoreError::CrcMismatch {
799 entry: self.meta.name.clone(),
800 expected: self.meta.crc32,
801 actual,
802 }));
803 }
804 }
805 return Ok(0);
806 }
807 self.hasher.update(&buf[..n]);
808 self.bytes_out += n as u64;
809 Ok(n)
810 }
811}
812
813#[cfg(test)]
814mod tests {
815 use super::zipcrypto_check_byte;
816
817 #[test]
818 fn check_byte_selects_crc_or_modtime() {
819 assert_eq!(zipcrypto_check_byte(0x0000, 0xAB12_3456, 0x7890), 0xAB);
821 assert_eq!(zipcrypto_check_byte(0x0008, 0xAB12_3456, 0xCD90), 0xCD);
823 }
824}