use super::{Metadata, ReadSeek};
use crate::{
types::{self, Pod},
utils::Counter,
};
use std::io;
#[inline]
fn not_a_zip() -> io::Error {
super::invalid("not a zip archive")
}
#[inline]
fn invalid_entry() -> io::Error {
super::invalid("invalid entry")
}
fn invalid_zip() -> io::Error {
super::invalid("invalid zip archive")
}
#[cold]
fn multi_disk() -> io::Error {
io::Error::new(
io::ErrorKind::Unsupported,
"multi-disk archives are not supported",
)
}
#[inline]
fn opt_eq<T: Eq>(a: &Option<T>, b: &Option<T>) -> bool {
match (a, b) {
(Some(a), Some(b)) => a == b,
_ => true,
}
}
trait ReadExt: io::Read {
fn read_variable_fields<'a, const N: usize>(
&mut self,
sizes: [usize; N],
buf: &'a mut Vec<u8>,
) -> io::Result<[&'a [u8]; N]> {
let total = sizes.iter().sum();
buf.resize(total, 0);
self.read_exact(buf)?;
let mut buf = &**buf;
Ok(sizes.map(|size| {
let (head, tail) = buf.split_at(size);
buf = tail;
head
}))
}
fn read_pod<T: Pod>(&mut self) -> io::Result<T> {
let mut buf = T::zeroed();
self.read_exact(buf.as_bytes_mut())?;
Ok(buf)
}
}
impl<R: io::Read + ?Sized> ReadExt for R {}
pub struct CentralDirectory {
offset: u64,
size: u64,
eocd_offset: u64,
entries: u64,
}
impl CentralDirectory {
fn validate_size(&self) -> Option<()> {
let min_size =
(size_of::<types::CentralFileHeader>() as u64 + 1).checked_mul(self.entries)?;
let expected_size = self.eocd_offset.checked_sub(self.offset)?;
if self.size < min_size || self.size != expected_size {
return None;
}
Some(())
}
}
type EocdData = (u64, types::EndOfCentralDirectory, Box<[u8]>);
fn find_eocd_in_buffer(buffer_offset: u64, buffer: &[u8]) -> io::Result<Option<EocdData>> {
let signature = types::EndOfCentralDirectory::SIGNATURE.as_bytes();
if let Some(i) = memchr::memmem::rfind(buffer, signature) {
let mut buffer = &buffer[i..];
let record: types::EndOfCentralDirectory = buffer.read_pod()?;
let offset = buffer_offset + i as u64;
return Ok(Some((offset, record, Box::from(buffer))));
}
Ok(None)
}
fn find_eocd(reader: &mut dyn ReadSeek) -> io::Result<EocdData> {
let size = reader.seek(io::SeekFrom::End(0))?;
if size < 22 {
return Err(not_a_zip());
}
let pos = reader.seek(io::SeekFrom::End(-22))?;
let record = reader.read_pod::<types::EndOfCentralDirectory>()?;
if let Some(eocd) = find_eocd_in_buffer(pos, record.as_bytes())? {
return Ok(eocd);
}
let read_size = std::cmp::min(size, 22 + u16::MAX as u64);
let pos = reader.seek(io::SeekFrom::Start(size - read_size))?;
let mut buffer = vec![0; read_size as usize];
reader.read_exact(&mut buffer)?;
if let Some(eocd) = find_eocd_in_buffer(pos, &buffer)? {
return Ok(eocd);
}
Err(not_a_zip())
}
fn read_eocd64(reader: &mut dyn ReadSeek, offset: u64) -> io::Result<CentralDirectory> {
let locator_offset = offset
.checked_sub(size_of::<types::EndOfCentralDirectory64Locator>() as u64)
.ok_or_else(invalid_zip)?;
reader.seek(io::SeekFrom::Start(locator_offset))?;
let locator: types::EndOfCentralDirectory64Locator = reader.read_pod()?;
let eocd_offset = locator.central_directory_64_offset.get();
if locator.signature != types::EndOfCentralDirectory64Locator::SIGNATURE
|| eocd_offset > locator_offset
{
return Err(invalid_zip());
}
if locator.disk_with_central_directory.get() != 0 || locator.total_disks.get() > 1 {
return Err(multi_disk());
}
reader.seek(io::SeekFrom::Start(eocd_offset))?;
let end_dir: types::EndOfCentralDirectory64 = reader.read_pod()?;
if end_dir.disk_with_central_directory.get() != 0 || end_dir.disk_number.get() != 0 {
return Err(multi_disk());
}
if { end_dir.total_entries } != { end_dir.entries_on_this_disk }
|| eocd_offset.checked_add(end_dir.record_size.get() + 12) != Some(locator_offset)
{
return Err(invalid_zip());
}
Ok(CentralDirectory {
offset: end_dir.central_directory_offset.get(),
size: end_dir.central_directory_size.get(),
eocd_offset,
entries: end_dir.total_entries.get(),
})
}
fn read_eocd(
reader: &mut dyn ReadSeek,
offset: u64,
dir_end: types::EndOfCentralDirectory,
) -> io::Result<CentralDirectory> {
if dir_end.disk_number.get() != 0 || dir_end.disk_with_central_directory.get() != 0 {
return Err(multi_disk());
}
if dir_end.total_entries != dir_end.entries_on_this_disk {
return Err(invalid_zip());
}
if dir_end.total_entries.get() == u16::MAX || dir_end.central_directory_offset.get() == u32::MAX
{
return read_eocd64(reader, offset);
}
Ok(CentralDirectory {
offset: dir_end.central_directory_offset.get() as _,
size: dir_end.central_directory_size.get() as _,
eocd_offset: offset,
entries: dir_end.total_entries.get() as _,
})
}
fn read_local_header(
reader: &mut Counter<&mut dyn ReadSeek>,
buf: &mut Vec<u8>,
) -> io::Result<Metadata> {
let header = reader.read_pod::<types::LocalFileHeader>()?;
let [file_name, extra_fields] = reader.read_variable_fields(
[
header.file_name_length.get() as _,
header.extra_fields_length.get() as _,
],
buf,
)?;
Metadata::from_local_header(header, file_name, extra_fields).ok_or_else(invalid_entry)
}
fn read_central_header(reader: &mut dyn ReadSeek, buf: &mut Vec<u8>) -> io::Result<Metadata> {
let header = reader.read_pod::<types::CentralFileHeader>()?;
let [file_name, extra_fields, comment] = reader.read_variable_fields(
[
header.file_name_length.get() as _,
header.extra_fields_length.get() as _,
header.file_comment_length.get() as _,
],
buf,
)?;
Metadata::from_central_header(header, file_name, extra_fields, comment)
.ok_or_else(invalid_entry)
}
fn read_data_descriptor(
reader: &mut Counter<&mut dyn ReadSeek>,
meta: &mut Metadata,
) -> io::Result<()> {
if !meta.is_streaming {
return Ok(());
}
if meta.is_zip64 {
let descriptor = reader.read_pod::<types::DataDescriptor64>()?;
if descriptor.signature != types::DataDescriptor64::SIGNATURE {
return Err(invalid_entry());
}
meta.compressed_size = descriptor.compressed_size.get();
meta.uncompressed_size = descriptor.uncompressed_size.get();
meta.crc32 = descriptor.crc32.get();
} else {
let descriptor = reader.read_pod::<types::DataDescriptor32>()?;
if descriptor.signature != types::DataDescriptor32::SIGNATURE {
return Err(invalid_entry());
}
meta.compressed_size = descriptor.compressed_size.get() as _;
meta.uncompressed_size = descriptor.uncompressed_size.get() as _;
meta.crc32 = descriptor.crc32.get();
}
Ok(())
}
fn check_local_entry(
reader: &mut Counter<&mut dyn ReadSeek>,
entry: &mut Metadata,
buf: &mut Vec<u8>,
) -> io::Result<()> {
let mut local_entry = read_local_header(reader, buf)?;
entry.data_offset = reader.amt;
if entry.file_type.is_directory() && entry.uncompressed_size != 0 {
return Err(invalid_entry());
} else {
reader.advance(entry.compressed_size as _)?;
}
read_data_descriptor(reader, &mut local_entry)?;
if entry.compression_method != local_entry.compression_method
|| entry.name != local_entry.name
|| entry.compressed_size != local_entry.compressed_size
|| entry.uncompressed_size != local_entry.uncompressed_size
|| entry.crc32 != local_entry.crc32
|| entry.flags != local_entry.flags
|| !opt_eq(&entry.modification_time, &local_entry.modification_time)
|| !opt_eq(&entry.access_time, &local_entry.access_time)
|| !opt_eq(&entry.creation_time, &local_entry.creation_time)
{
return Err(invalid_entry());
}
if entry.compression_method == crate::CompressionMethod::STORE
&& entry.compressed_size != entry.uncompressed_size
&& entry.encryption.is_none()
{
return Err(invalid_entry());
}
if entry.creation_time.is_none() {
entry.creation_time = local_entry.creation_time;
}
if entry.access_time.is_none() {
entry.access_time = local_entry.access_time;
}
Ok(())
}
fn read_central_directory(
reader: &mut dyn ReadSeek,
offset: u64,
len: u64,
) -> io::Result<Vec<Metadata>> {
if len == 0 {
return Ok(Vec::new());
}
let len = len.try_into().map_err(|_| invalid_zip())?;
let mut entries = Vec::new();
entries.try_reserve_exact(len)?;
reader.seek(io::SeekFrom::Start(offset))?;
let mut buf = Vec::new();
for _ in 0..len {
entries.push(read_central_header(reader, &mut buf)?);
}
let first_offset = entries.first().unwrap().header_offset;
reader.seek(io::SeekFrom::Start(first_offset))?;
let mut reader = crate::utils::Counter {
inner: reader,
amt: first_offset,
};
for entry in &mut entries {
check_local_entry(&mut reader, entry, &mut buf)?;
}
if reader.amt != offset {
return Err(invalid_zip());
}
Ok(entries)
}
pub(crate) fn read_archive(reader: &mut dyn ReadSeek) -> io::Result<(Vec<Metadata>, Box<[u8]>)> {
let (offset, dir_end, comment) = find_eocd(reader)?;
if dir_end.comment_length.get() as usize != comment.len() {
return Err(invalid_zip());
}
let central_dir = read_eocd(reader, offset, dir_end)?;
central_dir.validate_size().ok_or_else(invalid_zip)?;
let entries = read_central_directory(reader, central_dir.offset, central_dir.entries)?;
let mut names = std::collections::HashSet::with_capacity(entries.len());
for entry in &entries {
let name = entry.name.strip_suffix('/').unwrap_or(&entry.name);
if !names.insert(name) {
return Err(super::invalid("duplicated name in archive"));
}
}
Ok((entries, comment))
}