Skip to main content

async_zip/base/write/
entry_stream.rs

1// Copyright (c) 2021 Harry [Majored] [hello@majored.pw]
2// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
3
4use crate::base::write::compressed_writer::CompressedAsyncWriter;
5use crate::base::write::get_or_put_info_zip_unicode_comment_extra_field_mut;
6use crate::base::write::get_or_put_info_zip_unicode_path_extra_field_mut;
7use crate::base::write::io::offset::AsyncOffsetWriter;
8use crate::base::write::CentralDirectoryEntry;
9use crate::base::write::ZipFileWriter;
10use crate::entry::ZipEntry;
11use crate::error::{Result, Zip64ErrorCase, ZipError};
12use crate::spec::data_descriptor::{DataDescriptor, Zip64DataDescriptor};
13use crate::spec::extra_field::ExtraFieldAsBytes;
14use crate::spec::header::InfoZipUnicodeCommentExtraField;
15use crate::spec::header::InfoZipUnicodePathExtraField;
16use crate::spec::header::{
17    CentralDirectoryRecord, ExtraField, GeneralPurposeFlag, LocalFileHeader, Zip64ExtendedInformationExtraField,
18};
19use crate::string::StringEncoding;
20
21use std::io::Error;
22use std::pin::Pin;
23use std::task::{Context, Poll};
24
25use crate::base::read::get_zip64_extra_field_mut;
26use crate::spec::consts::{NON_ZIP64_MAX_NUM_FILES, NON_ZIP64_MAX_SIZE};
27use crc32fast::Hasher;
28use futures_lite::io::{AsyncWrite, AsyncWriteExt};
29
30/// An entry writer which supports the streaming of data (ie. the writing of unknown size or data at runtime).
31///
32/// # Note
33/// - This writer cannot be manually constructed; instead, use [`ZipFileWriter::write_entry_stream()`].
34/// - [`EntryStreamWriter::close()`] must be called before a stream writer goes out of scope.
35/// - Utilities for working with [`AsyncWrite`] values are provided by [`AsyncWriteExt`].
36pub struct EntryStreamWriter<'b, W: AsyncWrite + Unpin> {
37    writer: AsyncOffsetWriter<CompressedAsyncWriter<'b, W>>,
38    cd_entries: &'b mut Vec<CentralDirectoryEntry>,
39    entry: ZipEntry,
40    hasher: Hasher,
41    lfh: LocalFileHeader,
42    lfh_offset: u64,
43    data_offset: u64,
44    force_no_zip64: bool,
45    /// To write back to the original writer if zip64 is required.
46    is_zip64: &'b mut bool,
47}
48
49impl<'b, W: AsyncWrite + Unpin> EntryStreamWriter<'b, W> {
50    pub(crate) async fn from_raw(
51        writer: &'b mut ZipFileWriter<W>,
52        mut entry: ZipEntry,
53    ) -> Result<EntryStreamWriter<'b, W>> {
54        if writer.force_no_zip64 && writer.cd_entries.len() >= NON_ZIP64_MAX_NUM_FILES as usize {
55            return Err(ZipError::Zip64Needed(Zip64ErrorCase::TooManyFiles));
56        }
57
58        #[cfg(feature = "deflate64")]
59        if matches!(entry.compression(), crate::Compression::Deflate64) {
60            return Err(ZipError::FeatureNotSupported("Deflate64 writing"));
61        }
62
63        let lfh_offset = writer.writer.offset();
64        let lfh = EntryStreamWriter::write_lfh(writer, &mut entry).await?;
65        let data_offset = writer.writer.offset();
66        let force_no_zip64 = writer.force_no_zip64;
67
68        let cd_entries = &mut writer.cd_entries;
69        let is_zip64 = &mut writer.is_zip64;
70        let writer = AsyncOffsetWriter::new(CompressedAsyncWriter::from_raw(&mut writer.writer, entry.compression())?);
71
72        Ok(EntryStreamWriter {
73            writer,
74            cd_entries,
75            entry,
76            lfh,
77            lfh_offset,
78            data_offset,
79            hasher: Hasher::new(),
80            force_no_zip64,
81            is_zip64,
82        })
83    }
84
85    async fn write_lfh(writer: &'b mut ZipFileWriter<W>, entry: &mut ZipEntry) -> Result<LocalFileHeader> {
86        // Always emit a zip64 extended field, even if we don't need it, because we *might* need it.
87        // If we are forcing no zip, we will have to error later if the file is too large.
88        let (lfh_compressed, lfh_uncompressed) = if !writer.force_no_zip64 {
89            if !writer.is_zip64 {
90                writer.is_zip64 = true;
91            }
92            entry.extra_fields.push(ExtraField::Zip64ExtendedInformation(Zip64ExtendedInformationExtraField {
93                uncompressed_size: Some(entry.uncompressed_size),
94                compressed_size: Some(entry.compressed_size),
95                relative_header_offset: None,
96                disk_start_number: None,
97            }));
98
99            (NON_ZIP64_MAX_SIZE, NON_ZIP64_MAX_SIZE)
100        } else {
101            if entry.compressed_size > NON_ZIP64_MAX_SIZE as u64 || entry.uncompressed_size > NON_ZIP64_MAX_SIZE as u64
102            {
103                return Err(ZipError::Zip64Needed(Zip64ErrorCase::LargeFile));
104            }
105
106            (entry.compressed_size as u32, entry.uncompressed_size as u32)
107        };
108
109        let utf8_without_alternative =
110            entry.filename().is_utf8_without_alternative() && entry.comment().is_utf8_without_alternative();
111        if !utf8_without_alternative {
112            if matches!(entry.filename().encoding(), StringEncoding::Utf8) {
113                let u_file_name = entry.filename().as_bytes().to_vec();
114                if !u_file_name.is_empty() {
115                    let basic_crc32 =
116                        crc32fast::hash(entry.filename().alternative().unwrap_or_else(|| entry.filename().as_bytes()));
117                    let upath_field = get_or_put_info_zip_unicode_path_extra_field_mut(entry.extra_fields.as_mut());
118                    if let InfoZipUnicodePathExtraField::V1 { crc32, unicode } = upath_field {
119                        *crc32 = basic_crc32;
120                        *unicode = u_file_name;
121                    }
122                }
123            }
124            if matches!(entry.comment().encoding(), StringEncoding::Utf8) {
125                let u_comment = entry.comment().as_bytes().to_vec();
126                if !u_comment.is_empty() {
127                    let basic_crc32 =
128                        crc32fast::hash(entry.comment().alternative().unwrap_or_else(|| entry.comment().as_bytes()));
129                    let ucom_field = get_or_put_info_zip_unicode_comment_extra_field_mut(entry.extra_fields.as_mut());
130                    if let InfoZipUnicodeCommentExtraField::V1 { crc32, unicode } = ucom_field {
131                        *crc32 = basic_crc32;
132                        *unicode = u_comment;
133                    }
134                }
135            }
136        }
137
138        let filename_basic = entry.filename().alternative().unwrap_or_else(|| entry.filename().as_bytes());
139
140        let lfh = LocalFileHeader {
141            compressed_size: lfh_compressed,
142            uncompressed_size: lfh_uncompressed,
143            compression: entry.compression().into(),
144            crc: entry.crc32,
145            extra_field_length: entry
146                .extra_fields()
147                .count_bytes()
148                .try_into()
149                .map_err(|_| ZipError::ExtraFieldTooLarge)?,
150            file_name_length: filename_basic.len().try_into().map_err(|_| ZipError::FileNameTooLarge)?,
151            mod_time: entry.last_modification_date().time,
152            mod_date: entry.last_modification_date().date,
153            version: crate::spec::version::as_needed_to_extract(entry),
154            flags: GeneralPurposeFlag {
155                data_descriptor: true,
156                encrypted: false,
157                filename_unicode: utf8_without_alternative,
158            },
159        };
160
161        writer.writer.write_all(&crate::spec::consts::LFH_SIGNATURE.to_le_bytes()).await?;
162        writer.writer.write_all(&lfh.as_slice()).await?;
163        writer.writer.write_all(filename_basic).await?;
164        writer.writer.write_all(&entry.extra_fields().as_bytes()).await?;
165
166        Ok(lfh)
167    }
168
169    /// Consumes this entry writer and completes all closing tasks.
170    ///
171    /// This includes:
172    /// - Finalising the CRC32 hash value for the written data.
173    /// - Calculating the compressed and uncompressed byte sizes.
174    /// - Constructing a central directory header.
175    /// - Pushing that central directory header to the [`ZipFileWriter`]'s store.
176    ///
177    /// Failure to call this function before going out of scope would result in a corrupted ZIP file.
178    pub async fn close(mut self) -> Result<()> {
179        self.writer.close().await?;
180
181        let crc = self.hasher.finalize();
182        let uncompressed_size = self.writer.offset();
183        let inner_writer = self.writer.into_inner().into_inner();
184        let compressed_size = inner_writer.offset() - self.data_offset;
185
186        let (cdr_compressed_size, cdr_uncompressed_size, lh_offset) = if self.force_no_zip64 {
187            if uncompressed_size > NON_ZIP64_MAX_SIZE as u64
188                || compressed_size > NON_ZIP64_MAX_SIZE as u64
189                || self.lfh_offset > NON_ZIP64_MAX_SIZE as u64
190            {
191                return Err(ZipError::Zip64Needed(Zip64ErrorCase::LargeFile));
192            }
193            (uncompressed_size as u32, compressed_size as u32, self.lfh_offset as u32)
194        } else {
195            // When streaming an entry, we are always using a zip64 field.
196            match get_zip64_extra_field_mut(&mut self.entry.extra_fields) {
197                // This case shouldn't be necessary but is included for completeness.
198                None => {
199                    self.entry.extra_fields.push(ExtraField::Zip64ExtendedInformation(
200                        Zip64ExtendedInformationExtraField {
201                            uncompressed_size: Some(uncompressed_size),
202                            compressed_size: Some(compressed_size),
203                            relative_header_offset: Some(self.lfh_offset),
204                            disk_start_number: None,
205                        },
206                    ));
207                }
208                Some(zip64) => {
209                    zip64.uncompressed_size = Some(uncompressed_size);
210                    zip64.compressed_size = Some(compressed_size);
211                    zip64.relative_header_offset = Some(self.lfh_offset);
212                }
213            }
214            self.lfh.extra_field_length =
215                self.entry.extra_fields().count_bytes().try_into().map_err(|_| ZipError::ExtraFieldTooLarge)?;
216
217            (NON_ZIP64_MAX_SIZE, NON_ZIP64_MAX_SIZE, NON_ZIP64_MAX_SIZE)
218        };
219
220        inner_writer.write_all(&crate::spec::consts::DATA_DESCRIPTOR_SIGNATURE.to_le_bytes()).await?;
221        if self.force_no_zip64 {
222            let descriptor =
223                DataDescriptor { crc, compressed_size: cdr_compressed_size, uncompressed_size: cdr_uncompressed_size };
224            inner_writer.write_all(&descriptor.as_bytes()).await?;
225        } else {
226            let descriptor = Zip64DataDescriptor { crc, compressed_size, uncompressed_size };
227            inner_writer.write_all(&descriptor.as_bytes()).await?;
228        }
229
230        let comment_basic = self.entry.comment().alternative().unwrap_or_else(|| self.entry.comment().as_bytes());
231
232        let cdh = CentralDirectoryRecord {
233            compressed_size: cdr_compressed_size,
234            uncompressed_size: cdr_uncompressed_size,
235            crc,
236            v_made_by: crate::spec::version::as_made_by(),
237            v_needed: self.lfh.version,
238            compression: self.lfh.compression,
239            extra_field_length: self.lfh.extra_field_length,
240            file_name_length: self.lfh.file_name_length,
241            file_comment_length: comment_basic.len().try_into().map_err(|_| ZipError::CommentTooLarge)?,
242            mod_time: self.lfh.mod_time,
243            mod_date: self.lfh.mod_date,
244            flags: self.lfh.flags,
245            disk_start: 0,
246            inter_attr: self.entry.internal_file_attribute(),
247            exter_attr: self.entry.external_file_attribute(),
248            lh_offset,
249        };
250
251        self.cd_entries.push(CentralDirectoryEntry { header: cdh, entry: self.entry });
252        // Mark the archive as Zip64 once the central directory no longer fits in the legacy count field.
253        if self.cd_entries.len() > NON_ZIP64_MAX_NUM_FILES as usize && !*self.is_zip64 {
254            *self.is_zip64 = true;
255        }
256
257        Ok(())
258    }
259}
260
261impl<'a, W: AsyncWrite + Unpin> AsyncWrite for EntryStreamWriter<'a, W> {
262    fn poll_write(mut self: Pin<&mut Self>, cx: &mut Context, buf: &[u8]) -> Poll<std::result::Result<usize, Error>> {
263        let poll = Pin::new(&mut self.writer).poll_write(cx, buf);
264
265        if let Poll::Ready(Ok(written)) = poll {
266            self.hasher.update(&buf[0..written]);
267        }
268
269        poll
270    }
271
272    fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<std::result::Result<(), Error>> {
273        Pin::new(&mut self.writer).poll_flush(cx)
274    }
275
276    fn poll_close(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<std::result::Result<(), Error>> {
277        Pin::new(&mut self.writer).poll_close(cx)
278    }
279}