Skip to main content

async_zip/base/write/
entry_seekable.rs

1// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
2
3use crate::base::read::get_zip64_extra_field_mut;
4use crate::base::write::compressed_writer::CompressedAsyncWriter;
5use crate::base::write::get_or_put_info_zip_unicode_comment_extra_field_mut;
6use crate::base::write::get_or_put_info_zip_unicode_path_extra_field_mut;
7use crate::base::write::io::offset::AsyncOffsetWriter;
8use crate::base::write::{CentralDirectoryEntry, ZipFileWriter};
9use crate::entry::ZipEntry;
10use crate::error::{Result, Zip64ErrorCase, ZipError};
11use crate::spec::consts::{NON_ZIP64_MAX_NUM_FILES, NON_ZIP64_MAX_SIZE};
12use crate::spec::extra_field::ExtraFieldAsBytes;
13use crate::spec::header::{
14    CentralDirectoryRecord, ExtraField, GeneralPurposeFlag, InfoZipUnicodeCommentExtraField,
15    InfoZipUnicodePathExtraField, LocalFileHeader, Zip64ExtendedInformationExtraField,
16};
17use crate::StringEncoding;
18
19use crc32fast::Hasher;
20use futures_lite::io::{AsyncSeek, AsyncSeekExt, AsyncWrite, AsyncWriteExt, SeekFrom};
21use std::io::Error;
22use std::pin::Pin;
23use std::task::{Context, Poll};
24
25const ZIP64_VERSION_NEEDED: u16 = 45;
26
27/// An entry writer which streams data to a seekable ZIP output.
28///
29/// Unlike [`EntryStreamWriter`](crate::base::write::EntryStreamWriter), this writer doesn't use
30/// data descriptors. Instead, it writes a placeholder local file header, streams the entry data,
31/// then seeks back and patches the header with the final CRC and sizes.
32///
33/// If the final compressed or uncompressed size requires Zip64 but no Zip64 size fields were
34/// reserved up front, closing this writer will fail. Use [`ZipEntryBuilder::size`] to reserve those
35/// fields when the size is known to exceed the non-Zip64 limit, or use
36/// [`ZipFileWriter::write_entry_stream`] for fully unknown Zip64-sized entries.
37///
38/// [`ZipEntryBuilder::size`]: crate::ZipEntryBuilder::size
39pub struct EntrySeekableWriter<'b, W: AsyncWrite + AsyncSeek + Unpin> {
40    writer: AsyncOffsetWriter<CompressedAsyncWriter<'b, W>>,
41    cd_entries: &'b mut Vec<CentralDirectoryEntry>,
42    entry: ZipEntry,
43    hasher: Hasher,
44    lfh: LocalFileHeader,
45    lfh_offset: u64,
46    data_offset: u64,
47    local_header_has_zip64_sizes: bool,
48    force_no_zip64: bool,
49    /// To write back to the original writer if Zip64 is required.
50    is_zip64: &'b mut bool,
51}
52
53impl<'b, W: AsyncWrite + AsyncSeek + Unpin> EntrySeekableWriter<'b, W> {
54    pub(crate) async fn from_raw(
55        writer: &'b mut ZipFileWriter<W>,
56        mut entry: ZipEntry,
57    ) -> Result<EntrySeekableWriter<'b, W>> {
58        if writer.force_no_zip64 && writer.cd_entries.len() >= NON_ZIP64_MAX_NUM_FILES as usize {
59            return Err(ZipError::Zip64Needed(Zip64ErrorCase::TooManyFiles));
60        }
61
62        #[cfg(feature = "deflate64")]
63        if matches!(entry.compression(), crate::Compression::Deflate64) {
64            return Err(ZipError::FeatureNotSupported("Deflate64 writing"));
65        }
66
67        let lfh_offset = writer.writer.offset();
68        let (lfh, local_header_has_zip64_sizes) = EntrySeekableWriter::write_lfh(writer, &mut entry).await?;
69        let data_offset = writer.writer.offset();
70        let force_no_zip64 = writer.force_no_zip64;
71
72        let cd_entries = &mut writer.cd_entries;
73        let is_zip64 = &mut writer.is_zip64;
74        let writer = AsyncOffsetWriter::new(CompressedAsyncWriter::from_raw(&mut writer.writer, entry.compression())?);
75
76        Ok(EntrySeekableWriter {
77            writer,
78            cd_entries,
79            entry,
80            lfh,
81            lfh_offset,
82            data_offset,
83            local_header_has_zip64_sizes,
84            hasher: Hasher::new(),
85            force_no_zip64,
86            is_zip64,
87        })
88    }
89
90    async fn write_lfh(writer: &'b mut ZipFileWriter<W>, entry: &mut ZipEntry) -> Result<(LocalFileHeader, bool)> {
91        let local_header_has_zip64_sizes =
92            entry.uncompressed_size >= NON_ZIP64_MAX_SIZE as u64 || entry.compressed_size >= NON_ZIP64_MAX_SIZE as u64;
93        if local_header_has_zip64_sizes {
94            if writer.force_no_zip64 {
95                return Err(ZipError::Zip64Needed(Zip64ErrorCase::LargeFile));
96            }
97            if !writer.is_zip64 {
98                writer.is_zip64 = true;
99            }
100            // Reserve Zip64 size slots up front so the later header patch stays the same width.
101            match get_zip64_extra_field_mut(&mut entry.extra_fields) {
102                Some(zip64) => {
103                    zip64.uncompressed_size = Some(entry.uncompressed_size);
104                    zip64.compressed_size = Some(entry.compressed_size);
105                }
106                None => {
107                    entry.extra_fields.push(ExtraField::Zip64ExtendedInformation(Zip64ExtendedInformationExtraField {
108                        uncompressed_size: Some(entry.uncompressed_size),
109                        compressed_size: Some(entry.compressed_size),
110                        relative_header_offset: None,
111                        disk_start_number: None,
112                    }));
113                }
114            }
115        }
116
117        let utf8_without_alternative =
118            entry.filename().is_utf8_without_alternative() && entry.comment().is_utf8_without_alternative();
119        if !utf8_without_alternative {
120            if matches!(entry.filename().encoding(), StringEncoding::Utf8) {
121                let u_file_name = entry.filename().as_bytes().to_vec();
122                if !u_file_name.is_empty() {
123                    let basic_crc32 =
124                        crc32fast::hash(entry.filename().alternative().unwrap_or_else(|| entry.filename().as_bytes()));
125                    let upath_field = get_or_put_info_zip_unicode_path_extra_field_mut(entry.extra_fields.as_mut());
126                    if let InfoZipUnicodePathExtraField::V1 { crc32, unicode } = upath_field {
127                        *crc32 = basic_crc32;
128                        *unicode = u_file_name;
129                    }
130                }
131            }
132            if matches!(entry.comment().encoding(), StringEncoding::Utf8) {
133                let u_comment = entry.comment().as_bytes().to_vec();
134                if !u_comment.is_empty() {
135                    let basic_crc32 =
136                        crc32fast::hash(entry.comment().alternative().unwrap_or_else(|| entry.comment().as_bytes()));
137                    let ucom_field = get_or_put_info_zip_unicode_comment_extra_field_mut(entry.extra_fields.as_mut());
138                    if let InfoZipUnicodeCommentExtraField::V1 { crc32, unicode } = ucom_field {
139                        *crc32 = basic_crc32;
140                        *unicode = u_comment;
141                    }
142                }
143            }
144        }
145
146        let filename_basic = entry.filename().alternative().unwrap_or_else(|| entry.filename().as_bytes());
147
148        let lfh = LocalFileHeader {
149            compressed_size: if local_header_has_zip64_sizes { NON_ZIP64_MAX_SIZE } else { 0 },
150            uncompressed_size: if local_header_has_zip64_sizes { NON_ZIP64_MAX_SIZE } else { 0 },
151            compression: entry.compression().into(),
152            crc: entry.crc32,
153            extra_field_length: entry
154                .extra_fields()
155                .count_bytes()
156                .try_into()
157                .map_err(|_| ZipError::ExtraFieldTooLarge)?,
158            file_name_length: filename_basic.len().try_into().map_err(|_| ZipError::FileNameTooLarge)?,
159            mod_time: entry.last_modification_date().time,
160            mod_date: entry.last_modification_date().date,
161            version: crate::spec::version::as_needed_to_extract(entry),
162            flags: GeneralPurposeFlag {
163                data_descriptor: false,
164                encrypted: false,
165                filename_unicode: utf8_without_alternative,
166            },
167        };
168
169        writer.writer.write_all(&crate::spec::consts::LFH_SIGNATURE.to_le_bytes()).await?;
170        writer.writer.write_all(&lfh.as_slice()).await?;
171        writer.writer.write_all(filename_basic).await?;
172        writer.writer.write_all(&entry.extra_fields().as_bytes()).await?;
173
174        Ok((lfh, local_header_has_zip64_sizes))
175    }
176
177    /// Consumes this entry writer and completes all closing tasks.
178    ///
179    /// This includes:
180    /// - Finalising the CRC32 hash value for the written data.
181    /// - Calculating the compressed and uncompressed byte sizes.
182    /// - Seeking back to patch the local file header.
183    /// - Constructing a central directory header.
184    /// - Pushing that central directory header to the [`ZipFileWriter`]'s store.
185    ///
186    /// Failure to call this function before going out of scope would result in a corrupted ZIP file.
187    pub async fn close(mut self) -> Result<()> {
188        self.writer.close().await?;
189
190        let crc = self.hasher.finalize();
191        let uncompressed_size = self.writer.offset();
192        let inner_writer = self.writer.into_inner().into_inner();
193        let compressed_size = inner_writer.offset() - self.data_offset;
194        let end_offset = inner_writer.offset();
195
196        let requires_zip64_sizes =
197            uncompressed_size >= NON_ZIP64_MAX_SIZE as u64 || compressed_size >= NON_ZIP64_MAX_SIZE as u64;
198        let requires_zip64_offset = self.lfh_offset >= NON_ZIP64_MAX_SIZE as u64;
199
200        if self.force_no_zip64 && (requires_zip64_sizes || requires_zip64_offset) {
201            return Err(ZipError::Zip64Needed(Zip64ErrorCase::LargeFile));
202        }
203        if requires_zip64_sizes && !self.local_header_has_zip64_sizes {
204            return Err(ZipError::Zip64Needed(Zip64ErrorCase::LargeFile));
205        }
206
207        let uses_zip64_sizes = requires_zip64_sizes || self.local_header_has_zip64_sizes;
208        let uses_zip64_metadata = uses_zip64_sizes || requires_zip64_offset;
209        if uses_zip64_sizes {
210            if !*self.is_zip64 {
211                *self.is_zip64 = true;
212            }
213            self.lfh.compressed_size = NON_ZIP64_MAX_SIZE;
214            self.lfh.uncompressed_size = NON_ZIP64_MAX_SIZE;
215            match get_zip64_extra_field_mut(&mut self.entry.extra_fields) {
216                Some(zip64) => {
217                    zip64.uncompressed_size = Some(uncompressed_size);
218                    zip64.compressed_size = Some(compressed_size);
219                }
220                None => {
221                    self.entry.extra_fields.push(ExtraField::Zip64ExtendedInformation(
222                        Zip64ExtendedInformationExtraField {
223                            uncompressed_size: Some(uncompressed_size),
224                            compressed_size: Some(compressed_size),
225                            relative_header_offset: None,
226                            disk_start_number: None,
227                        },
228                    ));
229                }
230            }
231        } else {
232            self.lfh.compressed_size = compressed_size as u32;
233            self.lfh.uncompressed_size = uncompressed_size as u32;
234        }
235        if uses_zip64_metadata {
236            self.lfh.version = self.lfh.version.max(ZIP64_VERSION_NEEDED);
237        }
238        self.lfh.crc = crc;
239        self.lfh.extra_field_length =
240            self.entry.extra_fields().count_bytes().try_into().map_err(|_| ZipError::ExtraFieldTooLarge)?;
241
242        let filename_basic = self.entry.filename().alternative().unwrap_or_else(|| self.entry.filename().as_bytes());
243        let local_extra_fields = self.entry.extra_fields().as_bytes();
244
245        inner_writer.seek(SeekFrom::Start(self.lfh_offset + crate::spec::consts::SIGNATURE_LENGTH as u64)).await?;
246        inner_writer.write_all(&self.lfh.as_slice()).await?;
247        inner_writer.write_all(filename_basic).await?;
248        inner_writer.write_all(&local_extra_fields).await?;
249        inner_writer.seek(SeekFrom::Start(end_offset)).await?;
250
251        let lh_offset = if requires_zip64_offset {
252            if !*self.is_zip64 {
253                *self.is_zip64 = true;
254            }
255            match get_zip64_extra_field_mut(&mut self.entry.extra_fields) {
256                Some(zip64) => {
257                    zip64.relative_header_offset = Some(self.lfh_offset);
258                }
259                None => {
260                    self.entry.extra_fields.push(ExtraField::Zip64ExtendedInformation(
261                        Zip64ExtendedInformationExtraField {
262                            uncompressed_size: None,
263                            compressed_size: None,
264                            relative_header_offset: Some(self.lfh_offset),
265                            disk_start_number: None,
266                        },
267                    ));
268                }
269            }
270            NON_ZIP64_MAX_SIZE
271        } else {
272            self.lfh_offset as u32
273        };
274
275        let comment_basic = self.entry.comment().alternative().unwrap_or_else(|| self.entry.comment().as_bytes());
276
277        let cdh = CentralDirectoryRecord {
278            compressed_size: self.lfh.compressed_size,
279            uncompressed_size: self.lfh.uncompressed_size,
280            crc,
281            v_made_by: crate::spec::version::as_made_by(),
282            v_needed: self.lfh.version,
283            compression: self.lfh.compression,
284            extra_field_length: self
285                .entry
286                .extra_fields()
287                .count_bytes()
288                .try_into()
289                .map_err(|_| ZipError::ExtraFieldTooLarge)?,
290            file_name_length: self.lfh.file_name_length,
291            file_comment_length: comment_basic.len().try_into().map_err(|_| ZipError::CommentTooLarge)?,
292            mod_time: self.lfh.mod_time,
293            mod_date: self.lfh.mod_date,
294            flags: self.lfh.flags,
295            disk_start: 0,
296            inter_attr: self.entry.internal_file_attribute(),
297            exter_attr: self.entry.external_file_attribute(),
298            lh_offset,
299        };
300
301        self.cd_entries.push(CentralDirectoryEntry { header: cdh, entry: self.entry });
302        // Mark the archive as Zip64 once the central directory no longer fits in the legacy count field.
303        if self.cd_entries.len() > NON_ZIP64_MAX_NUM_FILES as usize && !*self.is_zip64 {
304            *self.is_zip64 = true;
305        }
306
307        Ok(())
308    }
309}
310
311impl<'a, W: AsyncWrite + AsyncSeek + Unpin> AsyncWrite for EntrySeekableWriter<'a, W> {
312    fn poll_write(mut self: Pin<&mut Self>, cx: &mut Context, buf: &[u8]) -> Poll<std::result::Result<usize, Error>> {
313        let poll = Pin::new(&mut self.writer).poll_write(cx, buf);
314
315        if let Poll::Ready(Ok(written)) = poll {
316            self.hasher.update(&buf[0..written]);
317        }
318
319        poll
320    }
321
322    fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<std::result::Result<(), Error>> {
323        Pin::new(&mut self.writer).poll_flush(cx)
324    }
325
326    fn poll_close(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<std::result::Result<(), Error>> {
327        Pin::new(&mut self.writer).poll_close(cx)
328    }
329}