rc_zip_sync/
read_zip.rs

1use rc_zip::{
2    error::Error,
3    fsm::{ArchiveFsm, FsmResult},
4    parse::Archive,
5};
6use rc_zip::{fsm::EntryFsm, parse::Entry};
7use tracing::trace;
8
9use crate::entry_reader::EntryReader;
10use crate::streaming_entry_reader::StreamingEntryReader;
11use std::{io::Read, ops::Deref};
12
13/// A trait for reading something as a zip archive
14///
15/// See also [ReadZip].
16pub trait ReadZipWithSize {
17    /// The type of the file to read from.
18    type File: HasCursor;
19
20    /// Reads self as a zip archive.
21    fn read_zip_with_size(&self, size: u64) -> Result<ArchiveHandle<'_, Self::File>, Error>;
22}
23
24/// A trait for reading something as a zip archive when we can tell size from
25/// self.
26///
27/// See also [ReadZipWithSize].
28pub trait ReadZip {
29    /// The type of the file to read from.
30    type File: HasCursor;
31
32    /// Reads self as a zip archive.
33    fn read_zip(&self) -> Result<ArchiveHandle<'_, Self::File>, Error>;
34}
35
36struct CursorState<'a, F: HasCursor + 'a> {
37    cursor: <F as HasCursor>::Cursor<'a>,
38    offset: u64,
39}
40
41impl<'a, F: HasCursor + 'a> CursorState<'a, F> {
42    /// Constructs a cursor only _after_ doing a bounds check with `offset` and `size`
43    fn try_new(has_cursor: &'a F, offset: u64, size: u64) -> Result<Self, Error> {
44        if offset > size {
45            return Err(std::io::Error::other(format!(
46                "archive tried reading beyond zip archive end. {offset} goes beyond {size}"
47            ))
48            .into());
49        }
50        let cursor = has_cursor.cursor_at(offset);
51        Ok(Self { cursor, offset })
52    }
53}
54
55impl<F> ReadZipWithSize for F
56where
57    F: HasCursor,
58{
59    type File = F;
60
61    fn read_zip_with_size(&self, size: u64) -> Result<ArchiveHandle<'_, F>, Error> {
62        let mut cstate: Option<CursorState<'_, F>> = None;
63
64        let mut fsm = ArchiveFsm::new(size);
65        loop {
66            if let Some(offset) = fsm.wants_read() {
67                trace!(%offset, "read_zip_with_size: wants_read, space len = {}", fsm.space().len());
68
69                let mut cstate_next = match cstate.take() {
70                    // all good, re-using
71                    Some(cstate) if cstate.offset == offset => cstate,
72                    Some(cstate) => {
73                        trace!(%offset, %cstate.offset, "read_zip_with_size: making new cursor (had wrong offset)");
74                        CursorState::try_new(self, offset, size)?
75                    }
76                    None => {
77                        trace!(%offset, "read_zip_with_size: making new cursor (had none)");
78                        CursorState::try_new(self, offset, size)?
79                    }
80                };
81
82                match cstate_next.cursor.read(fsm.space()) {
83                    Ok(read_bytes) => {
84                        cstate_next.offset += read_bytes as u64;
85                        cstate = Some(cstate_next);
86
87                        trace!(%read_bytes, "read_zip_with_size: read");
88                        if read_bytes == 0 {
89                            return Err(Error::IO(std::io::ErrorKind::UnexpectedEof.into()));
90                        }
91                        fsm.fill(read_bytes);
92                    }
93                    Err(err) => return Err(Error::IO(err)),
94                }
95            }
96
97            fsm = match fsm.process()? {
98                FsmResult::Done(archive) => {
99                    trace!("read_zip_with_size: done");
100                    return Ok(ArchiveHandle {
101                        file: self,
102                        archive,
103                    });
104                }
105                FsmResult::Continue(fsm) => fsm,
106            }
107        }
108    }
109}
110
111impl ReadZip for &[u8] {
112    type File = Self;
113
114    fn read_zip(&self) -> Result<ArchiveHandle<'_, Self::File>, Error> {
115        self.read_zip_with_size(self.len() as u64)
116    }
117}
118
119impl ReadZip for Vec<u8> {
120    type File = Self;
121
122    fn read_zip(&self) -> Result<ArchiveHandle<'_, Self::File>, Error> {
123        self.read_zip_with_size(self.len() as u64)
124    }
125}
126
127/// A zip archive, read synchronously from a file or other I/O resource.
128///
129/// This only contains metadata for the archive and its entries. Separate
130/// readers can be created for arbitraries entries on-demand using
131/// [EntryHandle::reader].
132pub struct ArchiveHandle<'a, F>
133where
134    F: HasCursor,
135{
136    file: &'a F,
137    archive: Archive,
138}
139
140impl<F> Deref for ArchiveHandle<'_, F>
141where
142    F: HasCursor,
143{
144    type Target = Archive;
145
146    fn deref(&self) -> &Self::Target {
147        &self.archive
148    }
149}
150
151impl<F> ArchiveHandle<'_, F>
152where
153    F: HasCursor,
154{
155    /// Iterate over all files in this zip, read from the central directory.
156    pub fn entries(&self) -> impl Iterator<Item = EntryHandle<'_, F>> {
157        self.archive.entries().map(move |entry| EntryHandle {
158            file: self.file,
159            entry,
160        })
161    }
162
163    /// Attempts to look up an entry by name. This is usually a bad idea,
164    /// as names aren't necessarily normalized in zip archives.
165    pub fn by_name<N: AsRef<str>>(&self, name: N) -> Option<EntryHandle<'_, F>> {
166        self.archive
167            .entries()
168            .find(|&x| x.name == name.as_ref())
169            .map(|entry| EntryHandle {
170                file: self.file,
171                entry,
172            })
173    }
174}
175
176/// A zip entry, read synchronously from a file or other I/O resource.
177pub struct EntryHandle<'a, F> {
178    file: &'a F,
179    entry: &'a Entry,
180}
181
182impl<F> Deref for EntryHandle<'_, F> {
183    type Target = Entry;
184
185    fn deref(&self) -> &Self::Target {
186        self.entry
187    }
188}
189
190impl<'a, F> EntryHandle<'a, F>
191where
192    F: HasCursor,
193{
194    /// Returns a reader for the entry.
195    pub fn reader(&self) -> EntryReader<<F as HasCursor>::Cursor<'a>> {
196        EntryReader::new(self.entry, self.file.cursor_at(self.entry.header_offset))
197    }
198
199    /// Reads the entire entry into a vector.
200    pub fn bytes(&self) -> std::io::Result<Vec<u8>> {
201        let mut v = Vec::new();
202        self.reader().read_to_end(&mut v)?;
203        Ok(v)
204    }
205}
206
207/// A sliceable I/O resource: we can ask for a [Read] at a given offset.
208pub trait HasCursor {
209    /// The type of [Read] returned by [HasCursor::cursor_at].
210    type Cursor<'a>: Read + 'a
211    where
212        Self: 'a;
213
214    /// Returns a [Read] at the given offset.
215    fn cursor_at(&self, offset: u64) -> Self::Cursor<'_>;
216}
217
218impl HasCursor for &[u8] {
219    type Cursor<'a>
220        = &'a [u8]
221    where
222        Self: 'a;
223
224    fn cursor_at(&self, offset: u64) -> Self::Cursor<'_> {
225        &self[offset.try_into().unwrap()..]
226    }
227}
228
229impl HasCursor for Vec<u8> {
230    type Cursor<'a>
231        = &'a [u8]
232    where
233        Self: 'a;
234
235    fn cursor_at(&self, offset: u64) -> Self::Cursor<'_> {
236        &self[offset.try_into().unwrap()..]
237    }
238}
239
240#[cfg(feature = "file")]
241impl HasCursor for std::fs::File {
242    type Cursor<'a>
243        = positioned_io::Cursor<&'a std::fs::File>
244    where
245        Self: 'a;
246
247    fn cursor_at(&self, offset: u64) -> Self::Cursor<'_> {
248        positioned_io::Cursor::new_pos(self, offset)
249    }
250}
251
252#[cfg(feature = "file")]
253impl ReadZip for std::fs::File {
254    type File = Self;
255
256    fn read_zip(&self) -> Result<ArchiveHandle<'_, Self>, Error> {
257        let size = self.metadata()?.len();
258        self.read_zip_with_size(size)
259    }
260}
261
262/// Allows reading zip entries in a streaming fashion, without seeking,
263/// based only on local headers. THIS IS NOT RECOMMENDED, as correctly
264/// reading zip files requires reading the central directory (located at
265/// the end of the file).
266pub trait ReadZipStreaming<R>
267where
268    R: Read,
269{
270    /// Get the first zip entry from the stream as a [StreamingEntryReader].
271    ///
272    /// See the trait's documentation for why using this is
273    /// generally a bad idea: you might want to use [ReadZip] or
274    /// [ReadZipWithSize] instead.
275    fn stream_zip_entries_throwing_caution_to_the_wind(
276        self,
277    ) -> Result<StreamingEntryReader<R>, Error>;
278}
279
280impl<R> ReadZipStreaming<R> for R
281where
282    R: Read,
283{
284    fn stream_zip_entries_throwing_caution_to_the_wind(
285        mut self,
286    ) -> Result<StreamingEntryReader<Self>, Error> {
287        let mut fsm = EntryFsm::new(None, None);
288
289        loop {
290            if fsm.wants_read() {
291                let n = self.read(fsm.space())?;
292                trace!("read {} bytes into buf for first zip entry", n);
293                fsm.fill(n);
294            }
295
296            if let Some(entry) = fsm.process_till_header()? {
297                let entry = entry.clone();
298                return Ok(StreamingEntryReader::new(fsm, entry, self));
299            }
300        }
301    }
302}