rc_zip_sync/
read_zip.rs

1use rc_zip::{
2    error::Error,
3    fsm::{ArchiveFsm, FsmResult},
4    parse::Archive,
5};
6use rc_zip::{fsm::EntryFsm, parse::Entry};
7use tracing::trace;
8
9use crate::entry_reader::EntryReader;
10use crate::streaming_entry_reader::StreamingEntryReader;
11use std::{io::Read, ops::Deref};
12
13/// A trait for reading something as a zip archive
14///
15/// See also [ReadZip].
16pub trait ReadZipWithSize {
17    /// The type of the file to read from.
18    type File: HasCursor;
19
20    /// Reads self as a zip archive.
21    fn read_zip_with_size(&self, size: u64) -> Result<ArchiveHandle<'_, Self::File>, Error>;
22}
23
24/// A trait for reading something as a zip archive when we can tell size from
25/// self.
26///
27/// See also [ReadZipWithSize].
28pub trait ReadZip {
29    /// The type of the file to read from.
30    type File: HasCursor;
31
32    /// Reads self as a zip archive.
33    fn read_zip(&self) -> Result<ArchiveHandle<'_, Self::File>, Error>;
34}
35
36impl<F> ReadZipWithSize for F
37where
38    F: HasCursor,
39{
40    type File = F;
41
42    fn read_zip_with_size(&self, size: u64) -> Result<ArchiveHandle<'_, F>, Error> {
43        struct CursorState<'a, F: HasCursor + 'a> {
44            cursor: <F as HasCursor>::Cursor<'a>,
45            offset: u64,
46        }
47        let mut cstate: Option<CursorState<'_, F>> = None;
48
49        let mut fsm = ArchiveFsm::new(size);
50        loop {
51            if let Some(offset) = fsm.wants_read() {
52                trace!(%offset, "read_zip_with_size: wants_read, space len = {}", fsm.space().len());
53
54                let mut cstate_next = match cstate.take() {
55                    Some(cstate) => {
56                        if cstate.offset == offset {
57                            // all good, re-using
58                            cstate
59                        } else {
60                            CursorState {
61                                cursor: self.cursor_at(offset),
62                                offset,
63                            }
64                        }
65                    }
66                    None => CursorState {
67                        cursor: self.cursor_at(offset),
68                        offset,
69                    },
70                };
71
72                match cstate_next.cursor.read(fsm.space()) {
73                    Ok(read_bytes) => {
74                        cstate_next.offset += read_bytes as u64;
75                        cstate = Some(cstate_next);
76
77                        trace!(%read_bytes, "read_zip_with_size: read");
78                        if read_bytes == 0 {
79                            return Err(Error::IO(std::io::ErrorKind::UnexpectedEof.into()));
80                        }
81                        fsm.fill(read_bytes);
82                    }
83                    Err(err) => return Err(Error::IO(err)),
84                }
85            }
86
87            fsm = match fsm.process()? {
88                FsmResult::Done(archive) => {
89                    trace!("read_zip_with_size: done");
90                    return Ok(ArchiveHandle {
91                        file: self,
92                        archive,
93                    });
94                }
95                FsmResult::Continue(fsm) => fsm,
96            }
97        }
98    }
99}
100
101impl ReadZip for &[u8] {
102    type File = Self;
103
104    fn read_zip(&self) -> Result<ArchiveHandle<'_, Self::File>, Error> {
105        self.read_zip_with_size(self.len() as u64)
106    }
107}
108
109impl ReadZip for Vec<u8> {
110    type File = Self;
111
112    fn read_zip(&self) -> Result<ArchiveHandle<'_, Self::File>, Error> {
113        self.read_zip_with_size(self.len() as u64)
114    }
115}
116
117/// A zip archive, read synchronously from a file or other I/O resource.
118///
119/// This only contains metadata for the archive and its entries. Separate
120/// readers can be created for arbitraries entries on-demand using
121/// [EntryHandle::reader].
122pub struct ArchiveHandle<'a, F>
123where
124    F: HasCursor,
125{
126    file: &'a F,
127    archive: Archive,
128}
129
130impl<F> Deref for ArchiveHandle<'_, F>
131where
132    F: HasCursor,
133{
134    type Target = Archive;
135
136    fn deref(&self) -> &Self::Target {
137        &self.archive
138    }
139}
140
141impl<F> ArchiveHandle<'_, F>
142where
143    F: HasCursor,
144{
145    /// Iterate over all files in this zip, read from the central directory.
146    pub fn entries(&self) -> impl Iterator<Item = EntryHandle<'_, F>> {
147        self.archive.entries().map(move |entry| EntryHandle {
148            file: self.file,
149            entry,
150        })
151    }
152
153    /// Attempts to look up an entry by name. This is usually a bad idea,
154    /// as names aren't necessarily normalized in zip archives.
155    pub fn by_name<N: AsRef<str>>(&self, name: N) -> Option<EntryHandle<'_, F>> {
156        self.archive
157            .entries()
158            .find(|&x| x.name == name.as_ref())
159            .map(|entry| EntryHandle {
160                file: self.file,
161                entry,
162            })
163    }
164}
165
166/// A zip entry, read synchronously from a file or other I/O resource.
167pub struct EntryHandle<'a, F> {
168    file: &'a F,
169    entry: &'a Entry,
170}
171
172impl<F> Deref for EntryHandle<'_, F> {
173    type Target = Entry;
174
175    fn deref(&self) -> &Self::Target {
176        self.entry
177    }
178}
179
180impl<'a, F> EntryHandle<'a, F>
181where
182    F: HasCursor,
183{
184    /// Returns a reader for the entry.
185    pub fn reader(&self) -> EntryReader<<F as HasCursor>::Cursor<'a>> {
186        EntryReader::new(self.entry, self.file.cursor_at(self.entry.header_offset))
187    }
188
189    /// Reads the entire entry into a vector.
190    pub fn bytes(&self) -> std::io::Result<Vec<u8>> {
191        let mut v = Vec::new();
192        self.reader().read_to_end(&mut v)?;
193        Ok(v)
194    }
195}
196
197/// A sliceable I/O resource: we can ask for a [Read] at a given offset.
198pub trait HasCursor {
199    /// The type of [Read] returned by [HasCursor::cursor_at].
200    type Cursor<'a>: Read + 'a
201    where
202        Self: 'a;
203
204    /// Returns a [Read] at the given offset.
205    fn cursor_at(&self, offset: u64) -> Self::Cursor<'_>;
206}
207
208impl HasCursor for &[u8] {
209    type Cursor<'a> = &'a [u8]
210    where
211        Self: 'a;
212
213    fn cursor_at(&self, offset: u64) -> Self::Cursor<'_> {
214        &self[offset.try_into().unwrap()..]
215    }
216}
217
218impl HasCursor for Vec<u8> {
219    type Cursor<'a> = &'a [u8]
220    where
221        Self: 'a;
222
223    fn cursor_at(&self, offset: u64) -> Self::Cursor<'_> {
224        &self[offset.try_into().unwrap()..]
225    }
226}
227
228#[cfg(feature = "file")]
229impl HasCursor for std::fs::File {
230    type Cursor<'a> = positioned_io::Cursor<&'a std::fs::File>
231    where
232        Self: 'a;
233
234    fn cursor_at(&self, offset: u64) -> Self::Cursor<'_> {
235        positioned_io::Cursor::new_pos(self, offset)
236    }
237}
238
239#[cfg(feature = "file")]
240impl ReadZip for std::fs::File {
241    type File = Self;
242
243    fn read_zip(&self) -> Result<ArchiveHandle<'_, Self>, Error> {
244        let size = self.metadata()?.len();
245        self.read_zip_with_size(size)
246    }
247}
248
249/// Allows reading zip entries in a streaming fashion, without seeking,
250/// based only on local headers. THIS IS NOT RECOMMENDED, as correctly
251/// reading zip files requires reading the central directory (located at
252/// the end of the file).
253pub trait ReadZipStreaming<R>
254where
255    R: Read,
256{
257    /// Get the first zip entry from the stream as a [StreamingEntryReader].
258    ///
259    /// See the trait's documentation for why using this is
260    /// generally a bad idea: you might want to use [ReadZip] or
261    /// [ReadZipWithSize] instead.
262    fn stream_zip_entries_throwing_caution_to_the_wind(
263        self,
264    ) -> Result<StreamingEntryReader<R>, Error>;
265}
266
267impl<R> ReadZipStreaming<R> for R
268where
269    R: Read,
270{
271    fn stream_zip_entries_throwing_caution_to_the_wind(
272        mut self,
273    ) -> Result<StreamingEntryReader<Self>, Error> {
274        let mut fsm = EntryFsm::new(None, None);
275
276        loop {
277            if fsm.wants_read() {
278                let n = self.read(fsm.space())?;
279                trace!("read {} bytes into buf for first zip entry", n);
280                fsm.fill(n);
281            }
282
283            if let Some(entry) = fsm.process_till_header()? {
284                let entry = entry.clone();
285                return Ok(StreamingEntryReader::new(fsm, entry, self));
286            }
287        }
288    }
289}