rc_zip_sync/
read_zip.rs

1use rc_zip::{
2    fsm::{ArchiveFsm, EntryFsm, FsmResult},
3    Archive, Entry, Error,
4};
5use tracing::trace;
6
7use crate::entry_reader::EntryReader;
8use crate::streaming_entry_reader::StreamingEntryReader;
9use std::{io::Read, ops::Deref};
10
11/// A trait for reading something as a zip archive
12///
13/// See also [ReadZip].
14pub trait ReadZipWithSize {
15    /// The type of the file to read from.
16    type File: HasCursor;
17
18    /// Reads self as a zip archive.
19    fn read_zip_with_size(&self, size: u64) -> Result<ArchiveHandle<'_, Self::File>, Error>;
20}
21
22/// A trait for reading something as a zip archive when we can tell size from
23/// self.
24///
25/// See also [ReadZipWithSize].
26pub trait ReadZip {
27    /// The type of the file to read from.
28    type File: HasCursor;
29
30    /// Reads self as a zip archive.
31    fn read_zip(&self) -> Result<ArchiveHandle<'_, Self::File>, Error>;
32}
33
34struct CursorState<'a, F: HasCursor + 'a> {
35    cursor: <F as HasCursor>::Cursor<'a>,
36    offset: u64,
37}
38
39impl<'a, F: HasCursor + 'a> CursorState<'a, F> {
40    /// Constructs a cursor only _after_ doing a bounds check with `offset` and `size`
41    fn try_new(has_cursor: &'a F, offset: u64, size: u64) -> Result<Self, Error> {
42        if offset > size {
43            return Err(std::io::Error::other(format!(
44                "archive tried reading beyond zip archive end. {offset} goes beyond {size}"
45            ))
46            .into());
47        }
48        let cursor = has_cursor.cursor_at(offset);
49        Ok(Self { cursor, offset })
50    }
51}
52
53impl<F> ReadZipWithSize for F
54where
55    F: HasCursor,
56{
57    type File = F;
58
59    fn read_zip_with_size(&self, size: u64) -> Result<ArchiveHandle<'_, F>, Error> {
60        let mut cstate: Option<CursorState<'_, F>> = None;
61
62        let mut fsm = ArchiveFsm::new(size);
63        loop {
64            if let Some(offset) = fsm.wants_read() {
65                trace!(%offset, "read_zip_with_size: wants_read, space len = {}", fsm.space().len());
66
67                let mut cstate_next = match cstate.take() {
68                    // all good, re-using
69                    Some(cstate) if cstate.offset == offset => cstate,
70                    Some(cstate) => {
71                        trace!(%offset, %cstate.offset, "read_zip_with_size: making new cursor (had wrong offset)");
72                        CursorState::try_new(self, offset, size)?
73                    }
74                    None => {
75                        trace!(%offset, "read_zip_with_size: making new cursor (had none)");
76                        CursorState::try_new(self, offset, size)?
77                    }
78                };
79
80                match cstate_next.cursor.read(fsm.space()) {
81                    Ok(read_bytes) => {
82                        cstate_next.offset += read_bytes as u64;
83                        cstate = Some(cstate_next);
84
85                        trace!(%read_bytes, "read_zip_with_size: read");
86                        if read_bytes == 0 {
87                            return Err(Error::IO(std::io::ErrorKind::UnexpectedEof.into()));
88                        }
89                        fsm.fill(read_bytes);
90                    }
91                    Err(err) => return Err(Error::IO(err)),
92                }
93            }
94
95            fsm = match fsm.process()? {
96                FsmResult::Done(archive) => {
97                    trace!("read_zip_with_size: done");
98                    return Ok(ArchiveHandle {
99                        file: self,
100                        archive,
101                    });
102                }
103                FsmResult::Continue(fsm) => fsm,
104            }
105        }
106    }
107}
108
109impl ReadZip for &[u8] {
110    type File = Self;
111
112    fn read_zip(&self) -> Result<ArchiveHandle<'_, Self::File>, Error> {
113        self.read_zip_with_size(self.len() as u64)
114    }
115}
116
117impl ReadZip for Vec<u8> {
118    type File = Self;
119
120    fn read_zip(&self) -> Result<ArchiveHandle<'_, Self::File>, Error> {
121        self.read_zip_with_size(self.len() as u64)
122    }
123}
124
125/// A zip archive, read synchronously from a file or other I/O resource.
126///
127/// This only contains metadata for the archive and its entries. Separate
128/// readers can be created for arbitraries entries on-demand using
129/// [EntryHandle::reader].
130pub struct ArchiveHandle<'a, F>
131where
132    F: HasCursor,
133{
134    file: &'a F,
135    archive: Archive,
136}
137
138impl<F> Deref for ArchiveHandle<'_, F>
139where
140    F: HasCursor,
141{
142    type Target = Archive;
143
144    fn deref(&self) -> &Self::Target {
145        &self.archive
146    }
147}
148
149impl<F> ArchiveHandle<'_, F>
150where
151    F: HasCursor,
152{
153    /// Iterate over all files in this zip, read from the central directory.
154    pub fn entries(&self) -> impl Iterator<Item = EntryHandle<'_, F>> {
155        self.archive.entries().map(move |entry| EntryHandle {
156            file: self.file,
157            entry,
158        })
159    }
160
161    /// Attempts to look up an entry by name. This is usually a bad idea,
162    /// as names aren't necessarily normalized in zip archives.
163    pub fn by_name<N: AsRef<str>>(&self, name: N) -> Option<EntryHandle<'_, F>> {
164        self.archive
165            .entries()
166            .find(|&x| x.name == name.as_ref())
167            .map(|entry| EntryHandle {
168                file: self.file,
169                entry,
170            })
171    }
172}
173
174/// A zip entry, read synchronously from a file or other I/O resource.
175pub struct EntryHandle<'a, F> {
176    file: &'a F,
177    entry: &'a Entry,
178}
179
180impl<F> Deref for EntryHandle<'_, F> {
181    type Target = Entry;
182
183    fn deref(&self) -> &Self::Target {
184        self.entry
185    }
186}
187
188impl<'a, F> EntryHandle<'a, F>
189where
190    F: HasCursor,
191{
192    /// Returns a reader for the entry.
193    pub fn reader(&self) -> EntryReader<<F as HasCursor>::Cursor<'a>> {
194        EntryReader::new(self.entry, self.file.cursor_at(self.entry.header_offset))
195    }
196
197    /// Reads the entire entry into a vector.
198    pub fn bytes(&self) -> std::io::Result<Vec<u8>> {
199        let mut v = Vec::new();
200        self.reader().read_to_end(&mut v)?;
201        Ok(v)
202    }
203}
204
205/// A sliceable I/O resource: we can ask for a [Read] at a given offset.
206pub trait HasCursor {
207    /// The type of [Read] returned by [HasCursor::cursor_at].
208    type Cursor<'a>: Read + 'a
209    where
210        Self: 'a;
211
212    /// Returns a [Read] at the given offset.
213    fn cursor_at(&self, offset: u64) -> Self::Cursor<'_>;
214}
215
216impl HasCursor for &[u8] {
217    type Cursor<'a>
218        = &'a [u8]
219    where
220        Self: 'a;
221
222    fn cursor_at(&self, offset: u64) -> Self::Cursor<'_> {
223        &self[offset.try_into().unwrap()..]
224    }
225}
226
227impl HasCursor for Vec<u8> {
228    type Cursor<'a>
229        = &'a [u8]
230    where
231        Self: 'a;
232
233    fn cursor_at(&self, offset: u64) -> Self::Cursor<'_> {
234        &self[offset.try_into().unwrap()..]
235    }
236}
237
238#[cfg(feature = "file")]
239impl HasCursor for std::fs::File {
240    type Cursor<'a>
241        = positioned_io::Cursor<&'a std::fs::File>
242    where
243        Self: 'a;
244
245    fn cursor_at(&self, offset: u64) -> Self::Cursor<'_> {
246        positioned_io::Cursor::new_pos(self, offset)
247    }
248}
249
250#[cfg(feature = "file")]
251impl ReadZip for std::fs::File {
252    type File = Self;
253
254    fn read_zip(&self) -> Result<ArchiveHandle<'_, Self>, Error> {
255        let size = self.metadata()?.len();
256        self.read_zip_with_size(size)
257    }
258}
259
260/// Allows reading zip entries in a streaming fashion, without seeking,
261/// based only on local headers. THIS IS NOT RECOMMENDED, as correctly
262/// reading zip files requires reading the central directory (located at
263/// the end of the file).
264pub trait ReadZipStreaming<R>
265where
266    R: Read,
267{
268    /// Get the first zip entry from the stream as a [StreamingEntryReader].
269    ///
270    /// See the trait's documentation for why using this is
271    /// generally a bad idea: you might want to use [ReadZip] or
272    /// [ReadZipWithSize] instead.
273    fn stream_zip_entries_throwing_caution_to_the_wind(
274        self,
275    ) -> Result<StreamingEntryReader<R>, Error>;
276}
277
278impl<R> ReadZipStreaming<R> for R
279where
280    R: Read,
281{
282    fn stream_zip_entries_throwing_caution_to_the_wind(
283        mut self,
284    ) -> Result<StreamingEntryReader<Self>, Error> {
285        let mut fsm = EntryFsm::new(None, None);
286
287        loop {
288            if fsm.wants_read() {
289                let n = self.read(fsm.space())?;
290                trace!("read {} bytes into buf for first zip entry", n);
291                fsm.fill(n);
292            }
293
294            if let Some(entry) = fsm.process_till_header()? {
295                let entry = entry.clone();
296                return Ok(StreamingEntryReader::new(fsm, entry, self));
297            }
298        }
299    }
300}