streaming_zip/
lib.rs

1// https://en.wikipedia.org/wiki/ZIP_(file_format)
2
3use std::io::Write;
4use std::io::Read;
5use std::io::Error;
6use std::io::ErrorKind;
7use std::io::Result;
8use std::path::Path;
9use std::fs::File;
10use std::os::unix::ffi::OsStringExt;
11use chrono::NaiveDateTime;
12use chrono::DateTime;
13use chrono::offset::Utc;
14use chrono::Datelike;
15use chrono::Timelike;
16use crc::{Crc, Digest};
17use miniz_oxide::deflate::core::CompressorOxide;
18use miniz_oxide::deflate::stream::deflate;
19use miniz_oxide::MZFlush;
20use miniz_oxide::MZStatus;
21use miniz_oxide::DataFormat;
22
23const CRC32: Crc<u32> = Crc::<u32>::new(&crc::CRC_32_ISO_HDLC);
24
25#[derive(Debug, Clone, Default)]
26struct DataDescriptor {
27    crc: u32,
28    compressed_size: u64,
29    uncompressed_size: u64,
30}
31
32impl DataDescriptor {
33    fn write<W: Write>(&self, handle: &mut W, with_signature: bool, u64_fields: bool, is_zip64: bool) -> Result<usize> {
34        let mut written = 0;
35        if with_signature {
36            handle.write_all(b"PK\x07\x08")?; // data descriptor signature
37            written += 4;
38        }
39        handle.write_all(&self.crc.to_le_bytes())?;
40        written += 4;
41        if u64_fields {
42            handle.write_all(&self.compressed_size.to_le_bytes())?;
43            written += 8;
44            handle.write_all(&self.uncompressed_size.to_le_bytes())?;
45            written += 8;
46        } else if is_zip64 {
47            handle.write_all(&u32::MAX.to_le_bytes())?;
48            written += 4;
49            handle.write_all(&u32::MAX.to_le_bytes())?;
50            written += 4;
51        } else {
52            handle.write_all(&(self.compressed_size as u32).to_le_bytes())?;
53            written += 4;
54            handle.write_all(&(self.uncompressed_size as u32).to_le_bytes())?;
55            written += 4;
56        }
57        Ok(written)
58    }
59}
60
61#[derive(Debug, Clone)]
62struct FileHeader {
63    name: Vec<u8>,
64    last_modified: NaiveDateTime,
65    data_descriptor: Option<DataDescriptor>,
66    file_header_start: u64,
67    compression: CompressionMode,
68    is_zip64: bool
69}
70
71impl FileHeader {
72    fn write<W: Write>(&self, handle: &mut W, is_central: bool) -> Result<usize> {
73        let mut written = 0;
74        if is_central {
75            handle.write_all(b"PK\x01\x02")?; // Central directory file header signature
76            written += 4;
77        } else {
78            handle.write_all(b"PK\x03\x04")?; // Local file header signature
79            written += 4;
80        }
81        if is_central {
82            if self.is_zip64 {
83                handle.write_all(&45u16.to_le_bytes())?; // Version made by => 4.5
84                written += 2;
85            } else {
86                handle.write_all(&10u16.to_le_bytes())?; // Version made by => 1.0
87                written += 2;
88            }
89        }
90        if self.is_zip64 {
91            handle.write_all(&45u16.to_le_bytes())?; // Version needed to extract (minimum) => 4.5
92            written += 2;
93        } else {
94            handle.write_all(&10u16.to_le_bytes())?; // Version needed to extract (minimum) => 1.0
95            written += 2;
96        }
97        handle.write_all(&0b0000_1000u16.to_le_bytes())?; // General purpose bit flag => enable data descriptor
98        written += 2;
99        let compression_num: u16 = match self.compression {
100            CompressionMode::Store => 0,
101            CompressionMode::Deflate(_) => 8,
102        };
103        handle.write_all(&compression_num.to_le_bytes())?; // Compression method
104        written += 2;
105        let timepart = ((self.last_modified.second() as u16) >> 1) | ((self.last_modified.minute() as u16) << 5) | ((self.last_modified.hour() as u16) << 11);
106        let datepart = (self.last_modified.day() as u16) | ((self.last_modified.month() as u16) << 5) | ((self.last_modified.year() as u16 - 1980) << 9);
107        handle.write_all(&timepart.to_le_bytes())?; // File last modification time
108        written += 2;
109        handle.write_all(&datepart.to_le_bytes())?; // File last modification date
110        written += 2;
111        written += self.data_descriptor.clone().unwrap_or_default().write(handle, false, false, self.is_zip64)?;
112        handle.write_all(&(self.name.len() as u16).to_le_bytes())?; // File name length
113        written += 2;
114        if self.is_zip64 {
115            handle.write_all(&28u16.to_le_bytes())?; // Extra field length
116            written += 2;
117        } else {
118            handle.write_all(&0u16.to_le_bytes())?; // Extra field length
119            written += 2;
120        }
121        if is_central {
122            handle.write_all(&0u16.to_le_bytes())?; // File comment length
123            written += 2;
124            handle.write_all(&0u16.to_le_bytes())?; // Disk number where file starts
125            written += 2;
126            handle.write_all(&0u16.to_le_bytes())?; // Internal file attributes
127            written += 2;
128            handle.write_all(&0u32.to_le_bytes())?; // External file attributes
129            written += 4;
130            if self.is_zip64 {
131                handle.write_all(&u32::MAX.to_le_bytes())?; // Relative offset of local file header
132                written += 4;
133            } else {
134                handle.write_all(&(self.file_header_start as u32).to_le_bytes())?; // Relative offset of local file header
135                written += 4;
136            }
137        }
138        handle.write_all(&self.name)?; // File name
139        written += self.name.len();
140        if self.is_zip64 {
141            handle.write_all(&1u16.to_le_bytes())?; // Extra field header
142            written += 2;
143            handle.write_all(&24u16.to_le_bytes())?; // Size of the extra field chunk
144            written += 2;
145            let dd = self.data_descriptor.clone().unwrap_or_default();
146            handle.write_all(&dd.uncompressed_size.to_le_bytes())?; // Original uncompressed file size
147            written += 8;
148            handle.write_all(&dd.compressed_size.to_le_bytes())?; // Size of compressed data
149            written += 8;
150            handle.write_all(&self.file_header_start.to_le_bytes())?; // Offset of local header record
151            written += 8;
152        }
153        Ok(written)
154    }
155}
156
157#[derive(Debug, Clone, Copy)]
158pub enum CompressionMode {
159    Store,
160    Deflate(u8),
161}
162
163pub struct Archive<W: Write> {
164    compressed_buf: [u8; 4096],
165    files: Vec<FileHeader>,
166    written: usize,
167    inner: W,
168    intermediate_digest: Option<Digest<'static, u32>>,
169    intermediate_compressor: Option<CompressorOxide>,
170    intermediate_uncompressed_size: u64,
171    intermediate_compressed_size: u64
172}
173
174impl<W: Write> Archive<W> {
175    pub fn new(inner: W) -> Archive<W> {
176        Archive {
177            compressed_buf: [0; 4096],
178            files: Vec::new(),
179            written: 0,
180            inner,
181            intermediate_digest: None,
182            intermediate_compressor: None,
183            intermediate_uncompressed_size: 0,
184            intermediate_compressed_size: 0,
185        }
186    }
187
188    pub fn start_new_file(&mut self, name: Vec<u8>, last_modified: NaiveDateTime, compression: CompressionMode, use_zip64: bool) -> Result<()> {
189        let file = FileHeader {
190            name,
191            last_modified,
192            data_descriptor: None,
193            file_header_start: self.written as u64,
194            compression,
195            is_zip64: use_zip64 || self.written > (u32::MAX as usize)
196        };
197        self.written += file.write(&mut self.inner, false)?;
198        self.files.push(file);
199        self.intermediate_digest = Some(CRC32.digest());
200        match compression {
201            CompressionMode::Store => self.intermediate_compressor = None,
202            CompressionMode::Deflate(level) => {
203                let mut compressor = CompressorOxide::default();
204                compressor.set_format_and_level(DataFormat::Raw, level);
205                self.intermediate_compressor = Some(compressor);
206            }
207        }
208        self.intermediate_uncompressed_size = 0;
209        self.intermediate_compressed_size = 0;
210
211        Ok(())
212    }
213
214    pub fn append_data(&mut self, content: &[u8]) -> Result<()> {
215        match self.intermediate_compressor {
216            Some(_) => self.append_data_deflate(content),
217            None => self.append_data_store(content),
218        }
219    }
220
221    pub fn finish_file(&mut self) -> Result<()> {
222        if self.intermediate_compressor.is_some() {
223            self.finish_data_deflate()?;
224            self.intermediate_compressor = None;
225        }
226        let digest = self.intermediate_digest.take().ok_or(Error::new(ErrorKind::InvalidData, "missing digest"))?;
227        let crc = digest.finalize();
228        let dd = DataDescriptor {
229            crc,
230            uncompressed_size: self.intermediate_uncompressed_size,
231            compressed_size: self.intermediate_compressed_size,
232        };
233        let file = self.files.last_mut().ok_or(Error::new(ErrorKind::InvalidData, "missing file header"))?;
234        self.written += dd.write(&mut self.inner, true, file.is_zip64, false)?;
235        file.data_descriptor = Some(dd);
236
237        Ok(())
238    }
239
240    fn append_data_deflate(&mut self, content: &[u8]) -> Result<()> {
241        let compressor = self.intermediate_compressor.as_mut().unwrap();
242        let digest = self.intermediate_digest.as_mut().ok_or(Error::new(ErrorKind::InvalidData, "missing digest"))?;
243        digest.update(content);
244        self.intermediate_uncompressed_size += content.len() as u64;
245
246        let mut in_buf = content;
247        loop {
248            let res = deflate(compressor, in_buf, &mut self.compressed_buf, MZFlush::None);
249            match res.status {
250                Ok(MZStatus::Ok) => (),
251                Ok(status) => return Err(Error::new(ErrorKind::Other, format!("deflate unexpected status: {:?}", status))),
252                Err(status) => return Err(Error::new(ErrorKind::Other, format!("deflate error: {:?}", status))),
253            }
254
255            self.intermediate_compressed_size += res.bytes_written as u64;
256            self.inner.write_all(&self.compressed_buf[..res.bytes_written])?;
257            self.written += res.bytes_written;
258            in_buf = &in_buf[res.bytes_consumed..];
259            if in_buf.len() == 0 { break; }
260        }
261
262        Ok(())
263    }
264
265    fn finish_data_deflate(&mut self) -> Result<()> {
266        loop {
267            let compressor = self.intermediate_compressor.as_mut().unwrap();
268            let res = deflate(compressor, &[], &mut self.compressed_buf, MZFlush::Finish);
269            let status = match res.status {
270                Ok(MZStatus::Ok) => MZStatus::Ok,
271                Ok(MZStatus::StreamEnd) => MZStatus::StreamEnd,
272                Ok(status) => return Err(Error::new(ErrorKind::Other, format!("deflate unexpected status: {:?}", status))),
273                Err(status) => return Err(Error::new(ErrorKind::Other, format!("deflate error: {:?}", status))),
274            };
275            self.intermediate_compressed_size += res.bytes_written as u64;
276            self.inner.write_all(&self.compressed_buf[..res.bytes_written])?;
277            self.written += res.bytes_written;
278            if let MZStatus::StreamEnd = status { break; }
279        }
280
281        Ok(())
282    }
283
284    fn append_data_store(&mut self, content: &[u8]) -> Result<()> {
285        let digest = self.intermediate_digest.as_mut().ok_or(Error::new(ErrorKind::InvalidData, "missing digest"))?;
286        digest.update(content);
287        self.intermediate_uncompressed_size += content.len() as u64;
288        self.intermediate_compressed_size += content.len() as u64;
289        self.inner.write_all(&content)?;
290        self.written += content.len();
291        Ok(())
292    }
293
294    pub fn add_file<R: Read>(&mut self, name: Vec<u8>, last_modified: NaiveDateTime, compression: CompressionMode, content: &mut R, use_zip64: bool) -> Result<()> {
295        self.start_new_file(name, last_modified, compression, use_zip64)?;
296        let mut buf = [0; 4096];
297        match compression {
298            CompressionMode::Store => {
299                while let Ok(bytes_read) = content.read(&mut buf) {
300                    if bytes_read == 0 { break; }
301                    self.append_data_store(&buf[..bytes_read])?;
302                }
303            },
304            CompressionMode::Deflate(_) => {
305                while let Ok(bytes_read) = content.read(&mut buf) {
306                    if bytes_read == 0 { break; }
307                    self.append_data_deflate(&buf[..bytes_read])?;
308                }
309            }
310        }
311        self.finish_file()?;
312
313        Ok(())
314    }
315
316    pub fn add_file_from_path<R: AsRef<Path>, S: AsRef<Path>>(&mut self, path: R, src_path: S, compression: CompressionMode, use_zip64: bool) -> Result<()> {
317        let mut file = File::open(src_path)?;
318        let modified = DateTime::<Utc>::from(file.metadata()?.modified()?).naive_local();
319        self.add_file(path.as_ref().to_path_buf().into_os_string().into_vec(), modified, compression, &mut file, use_zip64)?;
320        Ok(())
321    }
322
323    pub fn add_dir_all<R: AsRef<Path>, S: AsRef<Path>>(&mut self, path: R, src_path: S, compression: CompressionMode, use_zip64: bool) -> Result<()> {
324        let mut stack = vec![(src_path.as_ref().to_path_buf(), None)];
325        while let Some((src, modified_if_file)) = stack.pop() {
326            let dest = path.as_ref().join(src.strip_prefix(&src_path).unwrap());
327            match modified_if_file {
328                None => {
329                    for entry in std::fs::read_dir(&src)? {
330                        let entry = entry?;
331                        let file_type = entry.file_type()?;
332                        if !file_type.is_symlink() {
333                            let modified_if_file = match file_type.is_dir() {
334                                true => None,
335                                false => Some(DateTime::<Utc>::from(entry.metadata()?.modified()?).naive_local())
336                            };
337                            stack.push((entry.path(), modified_if_file));
338                        }
339                    }
340                },
341                Some(modified) => {
342                    self.add_file(dest.into_os_string().into_vec(), modified, compression, &mut File::open(src)?, use_zip64)?;
343                },
344            }
345        }
346        Ok(())
347    }
348
349    pub fn finish(mut self) -> Result<W> {
350        let mut is_zip64 = self.files.len() > u16::MAX.into();
351        let central_directory_start = self.written;
352        for file in &self.files {
353            self.written += file.write(&mut self.inner, true)?;
354            if file.is_zip64 {
355                is_zip64 = true
356            }
357        }
358        let central_directory_size = self.written - central_directory_start;
359
360        if is_zip64 {
361            self.inner.write_all(b"PK\x06\x06")?; // Zip64 end of central directory signature
362            self.inner.write_all(&44u64.to_le_bytes())?; // Size of EOCD64 minus 12
363            self.inner.write_all(&45u16.to_le_bytes())?; // Version made by
364            self.inner.write_all(&45u16.to_le_bytes())?; // Version needed to extract (minimum)
365            self.inner.write_all(&0u32.to_le_bytes())?; // Number of this disk
366            self.inner.write_all(&0u32.to_le_bytes())?; // Disk where central directory starts
367            self.inner.write_all(&(self.files.len() as u64).to_le_bytes())?; // Number of central directory records on this disk
368            self.inner.write_all(&(self.files.len() as u64).to_le_bytes())?; // Total number of central directory records
369            self.inner.write_all(&(central_directory_size as u64).to_le_bytes())?; // Size of central directory
370            self.inner.write_all(&(central_directory_start as u64).to_le_bytes())?; // Offset of start of central directory
371
372            self.inner.write_all(b"PK\x06\x07")?; // Zip64 end of central directory locator signature
373            self.inner.write_all(&0u32.to_le_bytes())?; // Number of the disk with the start of the Zip64 end of central directory record
374            self.inner.write_all(&(self.written as u64).to_le_bytes())?; // Relative offset of the Zip64 end of central directory record
375            self.inner.write_all(&1u32.to_le_bytes())?; // Total number of disks
376
377            self.inner.write_all(b"PK\x05\x06")?; // End of central directory signature
378            self.inner.write_all(&u16::MAX.to_le_bytes())?; // Number of this disk
379            self.inner.write_all(&u16::MAX.to_le_bytes())?; // Disk where central directory starts
380            if self.files.len() > (u16::MAX as usize) {
381                self.inner.write_all(&u16::MAX.to_le_bytes())?; // Number of central directory records on this disk
382                self.inner.write_all(&u16::MAX.to_le_bytes())?; // Total number of central directory records
383            } else {
384                self.inner.write_all(&(self.files.len() as u16).to_le_bytes())?; // Number of central directory records on this disk
385                self.inner.write_all(&(self.files.len() as u16).to_le_bytes())?; // Total number of central directory records
386            }
387            self.inner.write_all(&u32::MAX.to_le_bytes())?; // Size of central directory
388            self.inner.write_all(&u32::MAX.to_le_bytes())?; // Offset of start of central directory
389            self.inner.write_all(&0u16.to_le_bytes())?; // Comment length
390
391        } else {
392            self.inner.write_all(b"PK\x05\x06")?; // End of central directory signature
393            self.inner.write_all(&0u16.to_le_bytes())?; // Number of this disk
394            self.inner.write_all(&0u16.to_le_bytes())?; // Disk where central directory starts
395            self.inner.write_all(&(self.files.len() as u16).to_le_bytes())?; // Number of central directory records on this disk
396            self.inner.write_all(&(self.files.len() as u16).to_le_bytes())?; // Total number of central directory records
397            self.inner.write_all(&(central_directory_size as u32).to_le_bytes())?; // Size of central directory
398            self.inner.write_all(&(central_directory_start as u32).to_le_bytes())?; // Offset of start of central directory
399            self.inner.write_all(&0u16.to_le_bytes())?; // Comment length
400        }
401
402        Ok(self.inner)
403    }
404}