async_zip/base/read/
stream.rs

1// Copyright (c) 2023 Harry [Majored] [hello@majored.pw]
2// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
3
4//! A ZIP reader which acts over a non-seekable source.
5//!
6//! # API Design
7//! As opposed to other readers provided by this crate, it's important that the data of an entry is fully read before
8//! the proceeding entry is read. This is as a result of not being able to seek forwards or backwards, so we must end
9//! up at the start of the next entry.
10//!
11//! **We encode this invariant within Rust's type system so that it can be enforced at compile time.**
12//!
13//! This requires that any transition methods between these encoded types consume the reader and provide a new owned
14//! reader back. This is certainly something to keep in mind when working with this reader, but idiomatic code can
15//! still be produced nevertheless.
16//!
17//! # Considerations
18//! As the central directory of a ZIP archive is stored at the end of it, a non-seekable reader doesn't have access
19//! to it. We have to rely on information provided within the local file header which may not be accurate or complete.
20//! This results in:
21//! - The inability to read ZIP entries using the combination of a data descriptor and the Stored compression method.
22//! - No file comment being available (defaults to an empty string).
23//! - No internal or external file attributes being available (defaults to 0).
24//! - The extra field data potentially being inconsistent with what's stored in the central directory.
25//! - None of the following being available when the entry was written with a data descriptor (defaults to 0):
26//!     - CRC
27//!     - compressed size
28//!     - uncompressed size
29//!
30//! # Example
31//! ```no_run
32//! # use futures_lite::io::Cursor;
33//! # use async_zip::error::Result;
34//! # use async_zip::base::read::stream::ZipFileReader;
35//! #
36//! # async fn run() -> Result<()> {
37//! let mut zip = ZipFileReader::new(Cursor::new([0; 0]));
38//!     
39//! // Print the name of every file in a ZIP archive.
40//! while let Some(entry) = zip.next_with_entry().await? {
41//!     println!("File: {}", entry.reader().entry().filename().as_str().unwrap());
42//!     (.., zip) = entry.skip().await?;
43//! }
44//! #
45//! #     Ok(())
46//! # }
47//! ```
48
49use crate::base::read::counting::Counting;
50use crate::base::read::io::entry::ZipEntryReader;
51use crate::error::Result;
52use crate::error::ZipError;
53use crate::spec::data_descriptor::{CombinedDataDescriptor, DataDescriptor, Zip64DataDescriptor};
54#[cfg(feature = "tokio")]
55use crate::tokio::read::stream::Ready as TokioReady;
56
57use futures_lite::io::AsyncBufRead;
58use futures_lite::io::AsyncReadExt;
59
60use super::io::entry::WithEntry;
61use super::io::entry::WithoutEntry;
62use crate::spec::header::HeaderId;
63#[cfg(feature = "tokio")]
64use tokio_util::compat::TokioAsyncReadCompatExt;
65
66/// A type which encodes that [`ZipFileReader`] is ready to open a new entry.
67pub struct Ready<R>(R);
68
69/// A type which encodes that [`ZipFileReader`] is currently reading an entry.
70pub struct Reading<'a, R, E>(ZipEntryReader<'a, R, E>, Option<Suffix>);
71
72#[derive(Copy, Clone, Debug)]
73enum Suffix {
74    /// The entry is followed by a data descriptor.
75    DataDescriptor,
76    /// The entry is followed by a ZIP64 data descriptor.
77    Zip64DataDescriptor,
78}
79
80/// A ZIP reader which acts over a non-seekable source.
81///
82/// See the [module-level docs](.) for more information.
83#[derive(Clone)]
84pub struct ZipFileReader<S>(S);
85
86impl<'a, R> ZipFileReader<Ready<Counting<R>>>
87where
88    R: AsyncBufRead + Unpin + 'a,
89{
90    /// Constructs a new ZIP reader from a non-seekable source.
91    pub fn new(reader: R) -> Self {
92        Self(Ready(Counting::new(reader)))
93    }
94
95    /// Opens the next entry for reading if the central directory hasn’t yet been reached.
96    pub async fn next_without_entry(mut self) -> Result<Option<ZipFileReader<Reading<'a, Counting<R>, WithoutEntry>>>> {
97        let file_offset = self.0 .0.bytes_read();
98        let entry = match crate::base::read::lfh(&mut self.0 .0, file_offset).await? {
99            Some(entry) => entry,
100            None => return Ok(None),
101        };
102
103        let length = if entry.data_descriptor { u64::MAX } else { entry.compressed_size };
104        let reader = ZipEntryReader::new_with_owned(self.0 .0, entry.compression, length);
105
106        let suffix = if entry.data_descriptor {
107            if entry.extra_fields.iter().any(|ef| ef.header_id() == HeaderId::ZIP64_EXTENDED_INFORMATION_EXTRA_FIELD) {
108                Some(Suffix::Zip64DataDescriptor)
109            } else {
110                Some(Suffix::DataDescriptor)
111            }
112        } else {
113            None
114        };
115
116        Ok(Some(ZipFileReader(Reading(reader, suffix))))
117    }
118
119    /// Opens the next entry for reading if the central directory hasn’t yet been reached.
120    pub async fn next_with_entry(mut self) -> Result<Option<ZipFileReader<Reading<'a, Counting<R>, WithEntry<'a>>>>> {
121        let file_offset = self.0 .0.bytes_read();
122        let entry = match crate::base::read::lfh(&mut self.0 .0, file_offset).await? {
123            Some(entry) => entry,
124            None => return Ok(None),
125        };
126
127        let length = if entry.data_descriptor { u64::MAX } else { entry.compressed_size };
128        let reader = ZipEntryReader::new_with_owned(self.0 .0, entry.compression, length);
129
130        let suffix = if entry.data_descriptor {
131            if entry.extra_fields.iter().any(|ef| ef.header_id() == HeaderId::ZIP64_EXTENDED_INFORMATION_EXTRA_FIELD) {
132                Some(Suffix::Zip64DataDescriptor)
133            } else {
134                Some(Suffix::DataDescriptor)
135            }
136        } else {
137            None
138        };
139
140        Ok(Some(ZipFileReader(Reading(reader.into_with_entry_owned(entry), suffix))))
141    }
142
143    /// Consumes the `ZipFileReader` returning the original `reader`
144    pub async fn into_inner(self) -> R {
145        self.0 .0.into_inner()
146    }
147
148    /// Returns the file offset of the current reader.
149    pub fn offset(&self) -> u64 {
150        self.0 .0.bytes_read()
151    }
152}
153
154#[cfg(feature = "tokio")]
155impl<R> ZipFileReader<TokioReady<R>>
156where
157    R: tokio::io::AsyncBufRead + Unpin,
158{
159    /// Constructs a new tokio-specific ZIP reader from a non-seekable source.
160    pub fn with_tokio(reader: R) -> ZipFileReader<TokioReady<R>> {
161        Self(Ready(reader.compat()))
162    }
163}
164
165type Next<R> = (Option<CombinedDataDescriptor>, ZipFileReader<Ready<R>>);
166
167impl<'a, R, E> ZipFileReader<Reading<'a, R, E>>
168where
169    R: AsyncBufRead + Unpin,
170{
171    /// Returns an immutable reference to the inner entry reader.
172    pub fn reader(&self) -> &ZipEntryReader<'a, R, E> {
173        &self.0 .0
174    }
175
176    /// Returns a mutable reference to the inner entry reader.
177    pub fn reader_mut(&mut self) -> &mut ZipEntryReader<'a, R, E> {
178        &mut self.0 .0
179    }
180
181    /// Converts the reader back into the Ready state if EOF has been reached.
182    pub async fn done(mut self) -> Result<Next<R>> {
183        if self.0 .0.read(&mut [0; 1]).await? != 0 {
184            return Err(ZipError::EOFNotReached);
185        }
186
187        let mut inner = self.0 .0.into_inner();
188
189        let data_descriptor = match self.0 .1 {
190            Some(Suffix::DataDescriptor) => {
191                Some(CombinedDataDescriptor::from(DataDescriptor::from_reader(&mut inner).await?))
192            }
193            Some(Suffix::Zip64DataDescriptor) => {
194                Some(CombinedDataDescriptor::from(Zip64DataDescriptor::from_reader(&mut inner).await?))
195            }
196            None => None,
197        };
198
199        let reader = ZipFileReader(Ready(inner));
200
201        Ok((data_descriptor, reader))
202    }
203
204    /// Reads until EOF and converts the reader back into the Ready state.
205    pub async fn skip(mut self) -> Result<Next<R>> {
206        while self.0 .0.read(&mut [0; 2048]).await? != 0 {}
207        let mut inner = self.0 .0.into_inner();
208
209        let data_descriptor = match self.0 .1 {
210            Some(Suffix::DataDescriptor) => {
211                Some(CombinedDataDescriptor::from(DataDescriptor::from_reader(&mut inner).await?))
212            }
213            Some(Suffix::Zip64DataDescriptor) => {
214                Some(CombinedDataDescriptor::from(Zip64DataDescriptor::from_reader(&mut inner).await?))
215            }
216            None => None,
217        };
218
219        let reader = ZipFileReader(Ready(inner));
220
221        Ok((data_descriptor, reader))
222    }
223}