s_zip/
writer.rs

1//! Streaming ZIP writer that compresses data on-the-fly without temp files
2//!
3//! This eliminates:
4//! - Temp file disk I/O
5//! - File read buffers
6//! - Intermediate storage
7//!
8//! Expected RAM savings: 5-8 MB per file
9
10use crate::error::{Result, SZipError};
11use crc32fast::Hasher as Crc32;
12use flate2::write::DeflateEncoder;
13use flate2::Compression;
14use std::fs::File;
15use std::io::{Seek, Write};
16use std::path::Path;
17
18/// Entry being written to ZIP
19struct ZipEntry {
20    name: String,
21    local_header_offset: u64,
22    crc32: u32,
23    compressed_size: u32,
24    uncompressed_size: u32,
25}
26
27/// Streaming ZIP writer that compresses data on-the-fly
28pub struct StreamingZipWriter {
29    output: File,
30    entries: Vec<ZipEntry>,
31    current_entry: Option<CurrentEntry>,
32    compression_level: u32,
33}
34
35struct CurrentEntry {
36    name: String,
37    local_header_offset: u64,
38    encoder: DeflateEncoder<CrcCountingWriter>,
39}
40
41/// Writer that counts bytes and computes CRC32 while writing to output
42struct CrcCountingWriter {
43    output: File,
44    crc: Crc32,
45    uncompressed_count: u64,
46    compressed_count: u64,
47}
48
49impl CrcCountingWriter {
50    fn new(output: File) -> Self {
51        Self {
52            output,
53            crc: Crc32::new(),
54            uncompressed_count: 0,
55            compressed_count: 0,
56        }
57    }
58}
59
60impl Write for CrcCountingWriter {
61    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
62        // This is the compressed data being written
63        let n = self.output.write(buf)?;
64        self.compressed_count += n as u64;
65        Ok(n)
66    }
67
68    fn flush(&mut self) -> std::io::Result<()> {
69        self.output.flush()
70    }
71}
72
73impl StreamingZipWriter {
74    /// Create a new ZIP writer with default compression level (6)
75    pub fn new<P: AsRef<Path>>(path: P) -> Result<Self> {
76        Self::with_compression(path, 6)
77    }
78
79    /// Create a new ZIP writer with custom compression level (0-9)
80    pub fn with_compression<P: AsRef<Path>>(path: P, compression_level: u32) -> Result<Self> {
81        let output = File::create(path)?;
82        Ok(Self {
83            output,
84            entries: Vec::new(),
85            current_entry: None,
86            compression_level: compression_level.min(9),
87        })
88    }
89
90    /// Start a new entry (file) in the ZIP
91    pub fn start_entry(&mut self, name: &str) -> Result<()> {
92        // Finish previous entry if any
93        self.finish_current_entry()?;
94
95        let local_header_offset = self.output.stream_position()?;
96
97        // Write local file header with data descriptor flag (bit 3)
98        self.output.write_all(&[0x50, 0x4b, 0x03, 0x04])?; // signature
99        self.output.write_all(&[20, 0])?; // version needed
100        self.output.write_all(&[8, 0])?; // general purpose bit flag (bit 3 set)
101        self.output.write_all(&[8, 0])?; // compression method = deflate
102        self.output.write_all(&[0, 0, 0, 0])?; // mod time/date
103        self.output.write_all(&0u32.to_le_bytes())?; // crc32 placeholder
104        self.output.write_all(&0u32.to_le_bytes())?; // compressed size placeholder
105        self.output.write_all(&0u32.to_le_bytes())?; // uncompressed size placeholder
106        self.output.write_all(&(name.len() as u16).to_le_bytes())?;
107        self.output.write_all(&0u16.to_le_bytes())?; // extra len
108        self.output.write_all(name.as_bytes())?;
109
110        // Create encoder for this entry
111        let counting_writer = CrcCountingWriter::new(self.output.try_clone()?);
112        let encoder =
113            DeflateEncoder::new(counting_writer, Compression::new(self.compression_level));
114
115        self.current_entry = Some(CurrentEntry {
116            name: name.to_string(),
117            local_header_offset,
118            encoder,
119        });
120
121        Ok(())
122    }
123
124    /// Write uncompressed data to current entry (will be compressed on-the-fly)
125    pub fn write_data(&mut self, data: &[u8]) -> Result<()> {
126        if let Some(ref mut entry) = self.current_entry {
127            // Update CRC with uncompressed data
128            entry.encoder.get_mut().crc.update(data);
129            entry.encoder.get_mut().uncompressed_count += data.len() as u64;
130
131            // Write to encoder (compresses and writes to output)
132            entry.encoder.write_all(data)?;
133            Ok(())
134        } else {
135            Err(SZipError::InvalidFormat("No entry started".to_string()))
136        }
137    }
138
139    /// Finish current entry and write data descriptor
140    fn finish_current_entry(&mut self) -> Result<()> {
141        if let Some(entry) = self.current_entry.take() {
142            // Finish compression
143            let counting_writer = entry.encoder.finish()?;
144
145            let crc = counting_writer.crc.finalize();
146            let compressed_size = counting_writer.compressed_count as u32;
147            let uncompressed_size = counting_writer.uncompressed_count as u32;
148
149            // Write data descriptor
150            self.output.write_all(&[0x50, 0x4b, 0x07, 0x08])?;
151            self.output.write_all(&crc.to_le_bytes())?;
152            self.output.write_all(&compressed_size.to_le_bytes())?;
153            self.output.write_all(&uncompressed_size.to_le_bytes())?;
154
155            // Save entry info for central directory
156            self.entries.push(ZipEntry {
157                name: entry.name,
158                local_header_offset: entry.local_header_offset,
159                crc32: crc,
160                compressed_size,
161                uncompressed_size,
162            });
163        }
164        Ok(())
165    }
166
167    /// Finish ZIP file (write central directory and close)
168    pub fn finish(mut self) -> Result<()> {
169        // Finish last entry
170        self.finish_current_entry()?;
171
172        let central_dir_offset = self.output.stream_position()?;
173
174        // Write central directory
175        for entry in &self.entries {
176            self.output.write_all(&[0x50, 0x4b, 0x01, 0x02])?; // central dir sig
177            self.output.write_all(&[20, 0])?; // version made by
178            self.output.write_all(&[20, 0])?; // version needed
179            self.output.write_all(&[8, 0])?; // general purpose bit flag (bit 3 set)
180            self.output.write_all(&[8, 0])?; // compression method
181            self.output.write_all(&[0, 0, 0, 0])?; // mod time/date
182            self.output.write_all(&entry.crc32.to_le_bytes())?;
183            self.output
184                .write_all(&entry.compressed_size.to_le_bytes())?;
185            self.output
186                .write_all(&entry.uncompressed_size.to_le_bytes())?;
187            self.output
188                .write_all(&(entry.name.len() as u16).to_le_bytes())?;
189            self.output.write_all(&0u16.to_le_bytes())?; // extra len
190            self.output.write_all(&0u16.to_le_bytes())?; // file comment len
191            self.output.write_all(&0u16.to_le_bytes())?; // disk number start
192            self.output.write_all(&0u16.to_le_bytes())?; // internal attrs
193            self.output.write_all(&0u32.to_le_bytes())?; // external attrs
194            self.output
195                .write_all(&(entry.local_header_offset as u32).to_le_bytes())?;
196            self.output.write_all(entry.name.as_bytes())?;
197        }
198
199        let central_dir_size = self.output.stream_position()? - central_dir_offset;
200
201        // Write end of central directory
202        self.output.write_all(&[0x50, 0x4b, 0x05, 0x06])?;
203        self.output.write_all(&0u16.to_le_bytes())?; // disk number
204        self.output.write_all(&0u16.to_le_bytes())?; // disk with central dir
205        self.output
206            .write_all(&(self.entries.len() as u16).to_le_bytes())?;
207        self.output
208            .write_all(&(self.entries.len() as u16).to_le_bytes())?;
209        self.output
210            .write_all(&(central_dir_size as u32).to_le_bytes())?;
211        self.output
212            .write_all(&(central_dir_offset as u32).to_le_bytes())?;
213        self.output.write_all(&0u16.to_le_bytes())?; // comment len
214
215        self.output.flush()?;
216        Ok(())
217    }
218}