sett/zip/
reader.rs

1use std::io;
2
3use super::{ZipFile, error, spec};
4use crate::package::source::{Either, IntoAsyncRead, PackageStream};
5
6#[derive(Clone, Debug)]
7pub(crate) struct ZipReader<S> {
8    files: Vec<ZipFile>,
9    // Offset (in bytes) at which the ZIP archive starts in the original file
10    // (ZIP file may be prepended with arbitrary data).
11    zip_offset: u64,
12    pub(crate) source: S,
13}
14
15pub(crate) async fn local_header_size<R: tokio::io::AsyncBufRead + Unpin>(
16    reader: &mut R,
17) -> Result<u64, error::HeaderParseError> {
18    let local_header = spec::LocalFileHeader::parse(reader).await?;
19    Ok(local_header.header_size as u64)
20}
21
22impl<A> From<std::io::Error> for Either<A, std::io::Error> {
23    fn from(value: std::io::Error) -> Self {
24        Self::Or(value)
25    }
26}
27
28impl PackageStream for std::path::PathBuf {
29    type Error = std::io::Error;
30    type FileStream = tokio::io::Take<tokio::io::BufReader<tokio::fs::File>>;
31
32    async fn open_central_directory_stream(
33        &self,
34    ) -> Result<(impl std::io::Read + std::io::Seek, u64), Self::Error> {
35        Ok((io::BufReader::new(std::fs::File::open(self)?), 0))
36    }
37
38    async fn open_file_header_stream(
39        &self,
40        offset: u64,
41        total_size: u64,
42    ) -> Result<Self::FileStream, Either<crate::zip::error::HeaderParseError, Self::Error>> {
43        // Read the local file header to find where the file contents starts
44        let mut reader = tokio::io::BufReader::new(tokio::fs::File::open(self).await?);
45        use tokio::io::AsyncSeekExt as _;
46        reader.seek(io::SeekFrom::Start(offset)).await?;
47        let header_size = Self::local_header_size(&mut reader).await?;
48        let offset = offset + header_size;
49        reader.seek(io::SeekFrom::Start(offset)).await?;
50        use tokio::io::AsyncReadExt as _;
51        Ok(reader.take(total_size))
52    }
53
54    async fn open_raw(&self) -> Result<Self::FileStream, Self::Error> {
55        let reader = tokio::io::BufReader::new(tokio::fs::File::open(self).await?);
56        use tokio::io::AsyncReadExt as _;
57        Ok(reader.take(self.size().await?))
58    }
59
60    async fn size(&self) -> Result<u64, Self::Error> {
61        Ok(tokio::fs::File::open(self).await?.metadata().await?.len())
62    }
63
64    fn name(&self) -> String {
65        self.file_name()
66            .unwrap_or(self.as_os_str())
67            .to_string_lossy()
68            .to_string()
69    }
70}
71
72impl<S> ZipReader<S> {
73    /// Returns the list of files present in the Zip archive.
74    pub(crate) fn file_names(&self) -> impl Iterator<Item = &str> {
75        self.files.iter().map(|f| f.name.as_str())
76    }
77}
78
79impl<E> From<Either<error::HeaderParseError, E>> for error::ReadStreamError<E> {
80    fn from(value: Either<error::HeaderParseError, E>) -> Self {
81        match value {
82            Either::Either(value) => Self::Header(value),
83            Either::Or(value) => Self::Source(value),
84        }
85    }
86}
87
88impl<Src> ZipReader<Src>
89where
90    Src: PackageStream,
91{
92    pub(crate) async fn open(source: Src) -> Result<Self, error::OpenStreamError<Src::Error>> {
93        let (files, zip_offset) = {
94            let (mut reader, offset) = source.open_central_directory_stream().await?;
95            parse_zip_central_directory(&mut reader, offset).map_err(error::OpenStreamError::Zip)?
96        };
97        Ok(Self {
98            files,
99            zip_offset,
100            source,
101        })
102    }
103
104    /// Opens a file from the ZIP archive as data stream.
105    ///
106    /// Returns a tuple of a reader and a file size.
107    pub(crate) async fn open_file(
108        &self,
109        name: &str,
110    ) -> Result<(Src::FileStream, u64), error::ReadStreamError<Src::Error>> {
111        let file_info = self
112            .files
113            .iter()
114            .find(|f| f.name == name)
115            .ok_or_else(|| error::ReadStreamError::NotFound(name.to_string()))?;
116        if file_info.compression_method != 0 {
117            return Err(error::ReadStreamError::InvalidCompressionMethod);
118        }
119        let offset = file_info.offset + self.zip_offset;
120        let file_stream = self
121            .source
122            .open_file_header_stream(offset, file_info.size)
123            .await?;
124        Ok((file_stream, file_info.size))
125    }
126
127    pub(crate) async fn read_file(
128        &self,
129        name: &str,
130    ) -> Result<Vec<u8>, error::ReadStreamError<Src::Error>> {
131        let (reader, size) = self.open_file(name).await?;
132        let mut reader = IntoAsyncRead::into_async_reader(reader);
133        let mut buffer = Vec::with_capacity(size as usize);
134        use tokio::io::AsyncReadExt;
135        reader.read_to_end(&mut buffer).await?;
136        Ok(buffer)
137    }
138}
139
140/// Finds and parses ZIP central directory
141///
142/// The provided `reader` doesn't need to contain the entire ZIP archive.
143/// It can be limited only to the end part of the archive (containing all
144/// ZIP data structures that follow the last data file). In such a case
145/// `buffer_offset` must specify reader's position relative to the beginning
146/// of the ZIP file.
147fn parse_zip_central_directory<R: io::Read + io::Seek>(
148    reader: &mut R,
149    buffer_offset: u64,
150) -> Result<(Vec<ZipFile>, u64), error::ZipError> {
151    let with_buffer_offset = |offset: u64| {
152        offset
153            .checked_sub(buffer_offset)
154            .ok_or(error::ZipError::InvalidOffset)
155    };
156
157    spec::CentralDirectoryEnd::find(reader)?;
158    let cde = spec::CentralDirectoryEnd::parse(reader)?;
159
160    // If present the Zip64 central directory end locator is at 20 bytes
161    // before the central directory end.
162    reader.seek(io::SeekFrom::End(
163        -((cde.size() + spec::Zip64CentralDirectoryEndLocator::SIZE) as i64),
164    ))?;
165    let (number_of_records, central_directory_offset, zip_offset) = if let Some(zip64_end) =
166        spec::Zip64CentralDirectoryEndLocator::parse(reader)?
167    {
168        reader.seek(io::SeekFrom::Start(with_buffer_offset(
169            zip64_end.zip64_central_directory_end_offset,
170        )?))?;
171        let zip64cde_actual_position =
172            spec::Zip64CentralDirectoryEnd::find(reader)? + buffer_offset;
173        let zip64cde = spec::Zip64CentralDirectoryEnd::parse(reader)?;
174        if zip64cde.disk_number_of_records != zip64cde.total_number_of_records {
175            return Err(error::ZipError::UnimplementedMultiDisk);
176        }
177        (
178            zip64cde.total_number_of_records,
179            zip64cde.central_directory_offset,
180            zip64cde_actual_position - zip64_end.zip64_central_directory_end_offset,
181        )
182    } else {
183        reader.seek(io::SeekFrom::Start(with_buffer_offset(
184            cde.central_directory_offset as u64,
185        )?))?;
186        let central_directory_actual_offset = spec::CentralDirectoryHeader::find(reader)?;
187        (
188            cde.total_number_of_records as u64,
189            cde.central_directory_offset as u64,
190            central_directory_actual_offset + buffer_offset - (cde.central_directory_offset as u64),
191        )
192    };
193    reader.seek(io::SeekFrom::Start(with_buffer_offset(
194        zip_offset + central_directory_offset,
195    )?))?;
196    let mut files = Vec::with_capacity(number_of_records as usize);
197    for _ in 0..number_of_records {
198        let header = spec::CentralDirectoryHeader::parse(reader)?;
199        files.push(ZipFile {
200            name: header.name.into_owned(),
201            offset: header.offset,
202            size: header.size,
203            hasher: crc32fast::Hasher::new(),
204            crc32: header.crc32,
205            flags: header.flags,
206            external_attributes: header.external_attributes,
207            timestamp: header.modified,
208            compression_method: header.compression_method,
209        });
210    }
211    Ok((files, zip_offset))
212}