1use crate::error::{Result, SZipError};
7use flate2::read::DeflateDecoder;
8use std::fs::File;
9use std::io::{BufReader, Read, Seek, SeekFrom};
10use std::path::Path;
11
12const LOCAL_FILE_HEADER_SIGNATURE: u32 = 0x04034b50;
14
15const CENTRAL_DIRECTORY_SIGNATURE: u32 = 0x02014b50;
17
18const END_OF_CENTRAL_DIRECTORY_SIGNATURE: u32 = 0x06054b50;
20
21const ZIP64_END_OF_CENTRAL_DIRECTORY_SIGNATURE: u32 = 0x06064b50;
23
24#[derive(Debug, Clone)]
28pub struct ZipEntry {
29 pub name: String,
30 pub compressed_size: u64,
31 pub uncompressed_size: u64,
32 pub compression_method: u16,
33 pub offset: u64,
34}
35
36pub struct StreamingZipReader {
38 file: BufReader<File>,
39 entries: Vec<ZipEntry>,
40}
41
42impl StreamingZipReader {
43 pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
45 let mut file = BufReader::new(File::open(path)?);
46
47 let entries = Self::read_central_directory(&mut file)?;
49
50 Ok(StreamingZipReader { file, entries })
51 }
52
53 pub fn entries(&self) -> &[ZipEntry] {
55 &self.entries
56 }
57
58 pub fn find_entry(&self, name: &str) -> Option<&ZipEntry> {
60 self.entries.iter().find(|e| e.name == name)
61 }
62
63 pub fn read_entry(&mut self, entry: &ZipEntry) -> Result<Vec<u8>> {
65 self.file.seek(SeekFrom::Start(entry.offset))?;
67
68 let signature = self.read_u32_le()?;
70 if signature != LOCAL_FILE_HEADER_SIGNATURE {
71 return Err(SZipError::InvalidFormat(
72 "Invalid local file header signature".to_string(),
73 ));
74 }
75
76 self.file.seek(SeekFrom::Current(6))?;
78
79 self.file.seek(SeekFrom::Current(8))?;
81
82 self.file.seek(SeekFrom::Current(8))?;
84
85 let filename_len = self.read_u16_le()? as i64;
87 let extra_len = self.read_u16_le()? as i64;
88
89 self.file
91 .seek(SeekFrom::Current(filename_len + extra_len))?;
92
93 let mut compressed_data = vec![0u8; entry.compressed_size as usize];
95 self.file.read_exact(&mut compressed_data)?;
96
97 let data = if entry.compression_method == 8 {
99 let mut decoder = DeflateDecoder::new(&compressed_data[..]);
101 let mut decompressed = Vec::new();
102 decoder.read_to_end(&mut decompressed)?;
103 decompressed
104 } else if entry.compression_method == 0 {
105 compressed_data
107 } else if entry.compression_method == 93 {
108 #[cfg(feature = "zstd-support")]
110 {
111 zstd::decode_all(&compressed_data[..])?
112 }
113 #[cfg(not(feature = "zstd-support"))]
114 {
115 return Err(SZipError::UnsupportedCompression(entry.compression_method));
116 }
117 } else {
118 return Err(SZipError::UnsupportedCompression(entry.compression_method));
119 };
120
121 Ok(data)
122 }
123
124 pub fn read_entry_by_name(&mut self, name: &str) -> Result<Vec<u8>> {
126 let entry = self
127 .find_entry(name)
128 .ok_or_else(|| SZipError::EntryNotFound(name.to_string()))?
129 .clone();
130
131 self.read_entry(&entry)
132 }
133
134 pub fn read_entry_streaming_by_name(&mut self, name: &str) -> Result<Box<dyn Read + '_>> {
137 let entry = self
138 .find_entry(name)
139 .ok_or_else(|| SZipError::EntryNotFound(name.to_string()))?
140 .clone();
141
142 self.read_entry_streaming(&entry)
143 }
144
145 pub fn read_entry_streaming(&mut self, entry: &ZipEntry) -> Result<Box<dyn Read + '_>> {
148 self.file.seek(SeekFrom::Start(entry.offset))?;
150
151 let signature = self.read_u32_le()?;
153 if signature != LOCAL_FILE_HEADER_SIGNATURE {
154 return Err(SZipError::InvalidFormat(
155 "Invalid local file header signature".to_string(),
156 ));
157 }
158
159 self.file.seek(SeekFrom::Current(6))?;
161
162 self.file.seek(SeekFrom::Current(8))?;
164
165 self.file.seek(SeekFrom::Current(8))?;
167
168 let filename_len = self.read_u16_le()? as i64;
170 let extra_len = self.read_u16_le()? as i64;
171
172 self.file
174 .seek(SeekFrom::Current(filename_len + extra_len))?;
175
176 let limited_reader = (&mut self.file).take(entry.compressed_size);
178
179 if entry.compression_method == 8 {
181 Ok(Box::new(DeflateDecoder::new(limited_reader)))
183 } else if entry.compression_method == 0 {
184 Ok(Box::new(limited_reader))
186 } else if entry.compression_method == 93 {
187 #[cfg(feature = "zstd-support")]
189 {
190 Ok(Box::new(zstd::Decoder::new(limited_reader)?))
191 }
192 #[cfg(not(feature = "zstd-support"))]
193 {
194 Err(SZipError::UnsupportedCompression(entry.compression_method))
195 }
196 } else {
197 Err(SZipError::UnsupportedCompression(entry.compression_method))
198 }
199 }
200
201 pub fn read_entry_by_name_streaming(&mut self, name: &str) -> Result<Box<dyn Read + '_>> {
203 let entry = self
204 .find_entry(name)
205 .ok_or_else(|| SZipError::EntryNotFound(name.to_string()))?
206 .clone();
207
208 self.read_entry_streaming(&entry)
209 }
210
211 fn read_central_directory(file: &mut BufReader<File>) -> Result<Vec<ZipEntry>> {
213 let eocd_offset = Self::find_eocd(file)?;
215
216 file.seek(SeekFrom::Start(eocd_offset))?;
218
219 let signature = Self::read_u32_le_static(file)?;
221 if signature != END_OF_CENTRAL_DIRECTORY_SIGNATURE {
222 return Err(SZipError::InvalidFormat(format!(
223 "Invalid end of central directory signature: 0x{:08x}",
224 signature
225 )));
226 }
227
228 file.seek(SeekFrom::Current(4))?;
230
231 let _entries_on_disk = Self::read_u16_le_static(file)?;
233
234 let total_entries_16 = Self::read_u16_le_static(file)?;
238
239 let cd_size_32 = Self::read_u32_le_static(file)?;
241
242 let cd_offset_32 = Self::read_u32_le_static(file)? as u64;
244
245 let mut total_entries = total_entries_16 as usize;
247 let mut cd_offset = cd_offset_32;
248 let _cd_size = cd_size_32 as u64;
249
250 if total_entries_16 == 0xFFFF || cd_size_32 == 0xFFFFFFFF || cd_offset_32 == 0xFFFFFFFF {
251 let (zip64_total_entries, zip64_cd_size, zip64_cd_offset) =
253 Self::read_zip64_eocd(file, eocd_offset)?;
254 total_entries = zip64_total_entries as usize;
255 cd_offset = zip64_cd_offset;
256 let _ = zip64_cd_size;
258 }
259
260 file.seek(SeekFrom::Start(cd_offset))?;
262
263 let mut entries = Vec::with_capacity(total_entries);
265 for _ in 0..total_entries {
266 let signature = Self::read_u32_le_static(file)?;
267 if signature != CENTRAL_DIRECTORY_SIGNATURE {
268 break;
269 }
270
271 file.seek(SeekFrom::Current(6))?;
273
274 let compression_method = Self::read_u16_le_static(file)?;
275
276 file.seek(SeekFrom::Current(8))?;
278
279 let compressed_size_32 = Self::read_u32_le_static(file)? as u64;
281 let uncompressed_size_32 = Self::read_u32_le_static(file)? as u64;
282 let filename_len = Self::read_u16_le_static(file)? as usize;
283 let extra_len = Self::read_u16_le_static(file)? as usize;
284 let comment_len = Self::read_u16_le_static(file)? as usize;
285
286 file.seek(SeekFrom::Current(8))?;
288
289 let mut offset = Self::read_u32_le_static(file)? as u64;
290
291 let mut filename_buf = vec![0u8; filename_len];
293 file.read_exact(&mut filename_buf)?;
294 let name = String::from_utf8_lossy(&filename_buf).to_string();
295
296 let mut extra_buf = vec![0u8; extra_len];
298 if extra_len > 0 {
299 file.read_exact(&mut extra_buf)?;
300 }
301
302 let mut compressed_size = compressed_size_32;
304 let mut uncompressed_size = uncompressed_size_32;
305
306 if compressed_size_32 == 0xFFFFFFFF
307 || uncompressed_size_32 == 0xFFFFFFFF
308 || offset == 0xFFFFFFFF
309 {
310 let mut i = 0usize;
312 while i + 4 <= extra_buf.len() {
313 let id = u16::from_le_bytes([extra_buf[i], extra_buf[i + 1]]);
314 let data_len =
315 u16::from_le_bytes([extra_buf[i + 2], extra_buf[i + 3]]) as usize;
316 i += 4;
317 if i + data_len > extra_buf.len() {
318 break;
319 }
320 if id == 0x0001 {
321 let mut cursor = 0usize;
323 if uncompressed_size_32 == 0xFFFFFFFF && cursor + 8 <= data_len {
325 uncompressed_size = u64::from_le_bytes([
326 extra_buf[i + cursor],
327 extra_buf[i + cursor + 1],
328 extra_buf[i + cursor + 2],
329 extra_buf[i + cursor + 3],
330 extra_buf[i + cursor + 4],
331 extra_buf[i + cursor + 5],
332 extra_buf[i + cursor + 6],
333 extra_buf[i + cursor + 7],
334 ]);
335 cursor += 8;
336 }
337 if compressed_size_32 == 0xFFFFFFFF && cursor + 8 <= data_len {
339 compressed_size = u64::from_le_bytes([
340 extra_buf[i + cursor],
341 extra_buf[i + cursor + 1],
342 extra_buf[i + cursor + 2],
343 extra_buf[i + cursor + 3],
344 extra_buf[i + cursor + 4],
345 extra_buf[i + cursor + 5],
346 extra_buf[i + cursor + 6],
347 extra_buf[i + cursor + 7],
348 ]);
349 cursor += 8;
350 }
351 if offset == 0xFFFFFFFF && cursor + 8 <= data_len {
353 offset = u64::from_le_bytes([
354 extra_buf[i + cursor],
355 extra_buf[i + cursor + 1],
356 extra_buf[i + cursor + 2],
357 extra_buf[i + cursor + 3],
358 extra_buf[i + cursor + 4],
359 extra_buf[i + cursor + 5],
360 extra_buf[i + cursor + 6],
361 extra_buf[i + cursor + 7],
362 ]);
363 }
364 break;
366 }
367 i += data_len;
368 }
369 }
370
371 if comment_len > 0 {
373 file.seek(SeekFrom::Current(comment_len as i64))?;
374 }
375
376 entries.push(ZipEntry {
377 name,
378 compressed_size,
379 uncompressed_size,
380 compression_method,
381 offset,
382 });
383 }
384
385 Ok(entries)
386 }
387
388 fn read_zip64_eocd(file: &mut BufReader<File>, eocd_offset: u64) -> Result<(u64, u64, u64)> {
390 let search_start = eocd_offset.saturating_sub(65557);
392 file.seek(SeekFrom::Start(search_start))?;
393 let mut buffer = Vec::new();
394 file.read_to_end(&mut buffer)?;
395
396 let mut locator_pos: Option<usize> = None;
397 for i in (0..buffer.len().saturating_sub(3)).rev() {
398 if buffer[i] == 0x50
399 && buffer[i + 1] == 0x4b
400 && buffer[i + 2] == 0x06
401 && buffer[i + 3] == 0x07
402 {
403 locator_pos = Some(i);
404 break;
405 }
406 }
407
408 let locator_pos = locator_pos
409 .ok_or_else(|| SZipError::InvalidFormat("ZIP64 EOCD locator not found".to_string()))?;
410
411 let rel_off_bytes = &buffer[locator_pos + 8..locator_pos + 16];
414 let zip64_eocd_offset = u64::from_le_bytes([
415 rel_off_bytes[0],
416 rel_off_bytes[1],
417 rel_off_bytes[2],
418 rel_off_bytes[3],
419 rel_off_bytes[4],
420 rel_off_bytes[5],
421 rel_off_bytes[6],
422 rel_off_bytes[7],
423 ]);
424
425 file.seek(SeekFrom::Start(zip64_eocd_offset))?;
427
428 let sig = Self::read_u32_le_static(file)?;
429 if sig != ZIP64_END_OF_CENTRAL_DIRECTORY_SIGNATURE {
430 return Err(SZipError::InvalidFormat(format!(
431 "Invalid ZIP64 EOCD signature: 0x{:08x}",
432 sig
433 )));
434 }
435
436 let _size = {
438 let mut buf = [0u8; 8];
439 file.read_exact(&mut buf)?;
440 u64::from_le_bytes(buf)
441 };
442
443 file.seek(SeekFrom::Current(12))?;
445
446 let total_entries = {
448 let mut buf = [0u8; 8];
449 file.read_exact(&mut buf)?;
450 u64::from_le_bytes(buf)
451 };
452
453 {
455 let mut buf = [0u8; 8];
456 file.read_exact(&mut buf)?;
457 }
459
460 let cd_size = {
462 let mut buf = [0u8; 8];
463 file.read_exact(&mut buf)?;
464 u64::from_le_bytes(buf)
465 };
466
467 let cd_offset = {
469 let mut buf = [0u8; 8];
470 file.read_exact(&mut buf)?;
471 u64::from_le_bytes(buf)
472 };
473
474 Ok((total_entries, cd_size, cd_offset))
475 }
476
477 fn find_eocd(file: &mut BufReader<File>) -> Result<u64> {
479 let file_size = file.seek(SeekFrom::End(0))?;
480
481 let search_start = file_size.saturating_sub(65557);
483 file.seek(SeekFrom::Start(search_start))?;
484
485 let mut buffer = Vec::new();
486 file.read_to_end(&mut buffer)?;
487
488 for i in (0..buffer.len().saturating_sub(3)).rev() {
490 if buffer[i] == 0x50
491 && buffer[i + 1] == 0x4b
492 && buffer[i + 2] == 0x05
493 && buffer[i + 3] == 0x06
494 {
495 return Ok(search_start + i as u64);
496 }
497 }
498
499 Err(SZipError::InvalidFormat(
500 "End of central directory not found".to_string(),
501 ))
502 }
503
504 fn read_u16_le(&mut self) -> Result<u16> {
505 let mut buf = [0u8; 2];
506 self.file.read_exact(&mut buf)?;
507 Ok(u16::from_le_bytes(buf))
508 }
509
510 fn read_u32_le(&mut self) -> Result<u32> {
511 let mut buf = [0u8; 4];
512 self.file.read_exact(&mut buf)?;
513 Ok(u32::from_le_bytes(buf))
514 }
515
516 fn read_u16_le_static(file: &mut BufReader<File>) -> Result<u16> {
517 let mut buf = [0u8; 2];
518 file.read_exact(&mut buf)?;
519 Ok(u16::from_le_bytes(buf))
520 }
521
522 fn read_u32_le_static(file: &mut BufReader<File>) -> Result<u32> {
523 let mut buf = [0u8; 4];
524 file.read_exact(&mut buf)?;
525 Ok(u32::from_le_bytes(buf))
526 }
527}