rc_zip_sync/
read_zip.rs

1use rc_zip::{
2    error::Error,
3    fsm::{ArchiveFsm, FsmResult},
4    parse::Archive,
5};
6use rc_zip::{fsm::EntryFsm, parse::Entry};
7use tracing::trace;
8
9use crate::entry_reader::EntryReader;
10use crate::streaming_entry_reader::StreamingEntryReader;
11use std::{io::Read, ops::Deref};
12
13/// A trait for reading something as a zip archive
14///
15/// See also [ReadZip].
16pub trait ReadZipWithSize {
17    /// The type of the file to read from.
18    type File: HasCursor;
19
20    /// Reads self as a zip archive.
21    fn read_zip_with_size(&self, size: u64) -> Result<ArchiveHandle<'_, Self::File>, Error>;
22}
23
24/// A trait for reading something as a zip archive when we can tell size from
25/// self.
26///
27/// See also [ReadZipWithSize].
28pub trait ReadZip {
29    /// The type of the file to read from.
30    type File: HasCursor;
31
32    /// Reads self as a zip archive.
33    fn read_zip(&self) -> Result<ArchiveHandle<'_, Self::File>, Error>;
34}
35
36impl<F> ReadZipWithSize for F
37where
38    F: HasCursor,
39{
40    type File = F;
41
42    fn read_zip_with_size(&self, size: u64) -> Result<ArchiveHandle<'_, F>, Error> {
43        struct CursorState<'a, F: HasCursor + 'a> {
44            cursor: <F as HasCursor>::Cursor<'a>,
45            offset: u64,
46        }
47        let mut cstate: Option<CursorState<'_, F>> = None;
48
49        let mut fsm = ArchiveFsm::new(size);
50        loop {
51            if let Some(offset) = fsm.wants_read() {
52                trace!(%offset, "read_zip_with_size: wants_read, space len = {}", fsm.space().len());
53
54                let mut cstate_next = match cstate.take() {
55                    Some(cstate) => {
56                        if cstate.offset == offset {
57                            // all good, re-using
58                            cstate
59                        } else {
60                            CursorState {
61                                cursor: self.cursor_at(offset),
62                                offset,
63                            }
64                        }
65                    }
66                    None => CursorState {
67                        cursor: self.cursor_at(offset),
68                        offset,
69                    },
70                };
71
72                match cstate_next.cursor.read(fsm.space()) {
73                    Ok(read_bytes) => {
74                        cstate_next.offset += read_bytes as u64;
75                        cstate = Some(cstate_next);
76
77                        trace!(%read_bytes, "read_zip_with_size: read");
78                        if read_bytes == 0 {
79                            return Err(Error::IO(std::io::ErrorKind::UnexpectedEof.into()));
80                        }
81                        fsm.fill(read_bytes);
82                    }
83                    Err(err) => return Err(Error::IO(err)),
84                }
85            }
86
87            fsm = match fsm.process()? {
88                FsmResult::Done(archive) => {
89                    trace!("read_zip_with_size: done");
90                    return Ok(ArchiveHandle {
91                        file: self,
92                        archive,
93                    });
94                }
95                FsmResult::Continue(fsm) => fsm,
96            }
97        }
98    }
99}
100
101impl ReadZip for &[u8] {
102    type File = Self;
103
104    fn read_zip(&self) -> Result<ArchiveHandle<'_, Self::File>, Error> {
105        self.read_zip_with_size(self.len() as u64)
106    }
107}
108
109impl ReadZip for Vec<u8> {
110    type File = Self;
111
112    fn read_zip(&self) -> Result<ArchiveHandle<'_, Self::File>, Error> {
113        self.read_zip_with_size(self.len() as u64)
114    }
115}
116
117/// A zip archive, read synchronously from a file or other I/O resource.
118///
119/// This only contains metadata for the archive and its entries. Separate
120/// readers can be created for arbitraries entries on-demand using
121/// [EntryHandle::reader].
122pub struct ArchiveHandle<'a, F>
123where
124    F: HasCursor,
125{
126    file: &'a F,
127    archive: Archive,
128}
129
130impl<F> Deref for ArchiveHandle<'_, F>
131where
132    F: HasCursor,
133{
134    type Target = Archive;
135
136    fn deref(&self) -> &Self::Target {
137        &self.archive
138    }
139}
140
141impl<F> ArchiveHandle<'_, F>
142where
143    F: HasCursor,
144{
145    /// Iterate over all files in this zip, read from the central directory.
146    pub fn entries(&self) -> impl Iterator<Item = EntryHandle<'_, F>> {
147        self.archive.entries().map(move |entry| EntryHandle {
148            file: self.file,
149            entry,
150        })
151    }
152
153    /// Attempts to look up an entry by name. This is usually a bad idea,
154    /// as names aren't necessarily normalized in zip archives.
155    pub fn by_name<N: AsRef<str>>(&self, name: N) -> Option<EntryHandle<'_, F>> {
156        self.archive
157            .entries()
158            .find(|&x| x.name == name.as_ref())
159            .map(|entry| EntryHandle {
160                file: self.file,
161                entry,
162            })
163    }
164}
165
166/// A zip entry, read synchronously from a file or other I/O resource.
167pub struct EntryHandle<'a, F> {
168    file: &'a F,
169    entry: &'a Entry,
170}
171
172impl<F> Deref for EntryHandle<'_, F> {
173    type Target = Entry;
174
175    fn deref(&self) -> &Self::Target {
176        self.entry
177    }
178}
179
180impl<'a, F> EntryHandle<'a, F>
181where
182    F: HasCursor,
183{
184    /// Returns a reader for the entry.
185    pub fn reader(&self) -> EntryReader<<F as HasCursor>::Cursor<'a>> {
186        EntryReader::new(self.entry, self.file.cursor_at(self.entry.header_offset))
187    }
188
189    /// Reads the entire entry into a vector.
190    pub fn bytes(&self) -> std::io::Result<Vec<u8>> {
191        let mut v = Vec::new();
192        self.reader().read_to_end(&mut v)?;
193        Ok(v)
194    }
195}
196
197/// A sliceable I/O resource: we can ask for a [Read] at a given offset.
198pub trait HasCursor {
199    /// The type of [Read] returned by [HasCursor::cursor_at].
200    type Cursor<'a>: Read + 'a
201    where
202        Self: 'a;
203
204    /// Returns a [Read] at the given offset.
205    fn cursor_at(&self, offset: u64) -> Self::Cursor<'_>;
206}
207
208impl HasCursor for &[u8] {
209    type Cursor<'a>
210        = &'a [u8]
211    where
212        Self: 'a;
213
214    fn cursor_at(&self, offset: u64) -> Self::Cursor<'_> {
215        &self[offset.try_into().unwrap()..]
216    }
217}
218
219impl HasCursor for Vec<u8> {
220    type Cursor<'a>
221        = &'a [u8]
222    where
223        Self: 'a;
224
225    fn cursor_at(&self, offset: u64) -> Self::Cursor<'_> {
226        &self[offset.try_into().unwrap()..]
227    }
228}
229
230#[cfg(feature = "file")]
231impl HasCursor for std::fs::File {
232    type Cursor<'a>
233        = positioned_io::Cursor<&'a std::fs::File>
234    where
235        Self: 'a;
236
237    fn cursor_at(&self, offset: u64) -> Self::Cursor<'_> {
238        positioned_io::Cursor::new_pos(self, offset)
239    }
240}
241
242#[cfg(feature = "file")]
243impl ReadZip for std::fs::File {
244    type File = Self;
245
246    fn read_zip(&self) -> Result<ArchiveHandle<'_, Self>, Error> {
247        let size = self.metadata()?.len();
248        self.read_zip_with_size(size)
249    }
250}
251
252/// Allows reading zip entries in a streaming fashion, without seeking,
253/// based only on local headers. THIS IS NOT RECOMMENDED, as correctly
254/// reading zip files requires reading the central directory (located at
255/// the end of the file).
256pub trait ReadZipStreaming<R>
257where
258    R: Read,
259{
260    /// Get the first zip entry from the stream as a [StreamingEntryReader].
261    ///
262    /// See the trait's documentation for why using this is
263    /// generally a bad idea: you might want to use [ReadZip] or
264    /// [ReadZipWithSize] instead.
265    fn stream_zip_entries_throwing_caution_to_the_wind(
266        self,
267    ) -> Result<StreamingEntryReader<R>, Error>;
268}
269
270impl<R> ReadZipStreaming<R> for R
271where
272    R: Read,
273{
274    fn stream_zip_entries_throwing_caution_to_the_wind(
275        mut self,
276    ) -> Result<StreamingEntryReader<Self>, Error> {
277        let mut fsm = EntryFsm::new(None, None);
278
279        loop {
280            if fsm.wants_read() {
281                let n = self.read(fsm.space())?;
282                trace!("read {} bytes into buf for first zip entry", n);
283                fsm.fill(n);
284            }
285
286            if let Some(entry) = fsm.process_till_header()? {
287                let entry = entry.clone();
288                return Ok(StreamingEntryReader::new(fsm, entry, self));
289            }
290        }
291    }
292}