use std::cmp::min;
use std::fmt::Debug;
use std::fmt::Formatter;
use std::fmt::Result as FmtResult;
use std::mem::size_of;
use std::path::Path;
use crate::mmap::Mmap;
use crate::util::bytes_to_path;
use crate::util::Pod;
use crate::util::ReadRaw as _;
use crate::Error;
use crate::IntoError as _;
use crate::Result;
const CD_FILE_HEADER_MAGIC: u32 = 0x02014b50;
const END_OF_CD_RECORD_MAGIC: u32 = 0x06054b50;
const LOCAL_FILE_HEADER_MAGIC: u32 = 0x04034b50;
const FLAG_ENCRYPTED: u16 = 1 << 0;
const FLAG_HAS_DATA_DESCRIPTOR: u16 = 1 << 3;
#[derive(Clone, Debug)]
#[repr(C, packed)]
struct EndOfCdRecord {
magic: u32,
this_disk: u16,
cd_disk: u16,
cd_records: u16,
cd_records_total: u16,
cd_size: u32,
cd_offset: u32,
comment_length: u16,
}
unsafe impl Pod for EndOfCdRecord {}
#[derive(Clone, Debug)]
#[repr(C, packed)]
struct CdFileHeader {
magic: u32,
version: u16,
min_version: u16,
flags: u16,
compression: u16,
last_modified_time: u16,
last_modified_date: u16,
crc: u32,
compressed_size: u32,
uncompressed_size: u32,
file_name_length: u16,
extra_field_length: u16,
file_comment_length: u16,
disk: u16,
internal_attributes: u16,
external_attributes: u32,
offset: u32,
}
unsafe impl Pod for CdFileHeader {}
#[derive(Clone, Debug)]
#[repr(C, packed)]
struct LocalFileHeader {
magic: u32,
min_version: u16,
flags: u16,
compression: u16,
last_modified_time: u16,
last_modified_date: u16,
crc: u32,
compressed_size: u32,
uncompressed_size: u32,
file_name_length: u16,
extra_field_length: u16,
}
unsafe impl Pod for LocalFileHeader {}
#[doc(hidden)]
pub struct Entry<'archive> {
pub compression: u16,
pub path: &'archive Path,
pub data_offset: u64,
pub data: &'archive [u8],
}
impl Debug for Entry<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
let Entry {
compression,
path,
data_offset,
data,
} = self;
f.debug_struct(stringify!(Entry))
.field("compression", compression)
.field("path", path)
.field("data_offset", data_offset)
.field("data", &data.get(0..(min(data.len(), 32))))
.finish()
}
}
pub struct EntryIter<'archive> {
archive_data: &'archive [u8],
cd_record_data: &'archive [u8],
remaining_records: u16,
}
impl<'archive> EntryIter<'archive> {
fn parse_entry_at_offset(data: &[u8], offset: u32) -> Result<Entry<'_>> {
fn entry_impl(data: &[u8], offset: u32) -> Option<Result<Entry<'_>>> {
let mut data = data.get(offset as usize..)?;
let start = data.as_ptr();
let lfh = data.read_pod::<LocalFileHeader>()?;
if lfh.magic != LOCAL_FILE_HEADER_MAGIC {
return Some(Err(Error::with_invalid_data(
"local file header contains invalid magic number",
)))
}
if (lfh.flags & FLAG_ENCRYPTED) != 0 || (lfh.flags & FLAG_HAS_DATA_DESCRIPTOR) != 0 {
return Some(Err(Error::with_invalid_data(
"attempted lookup of unsupported entry",
)))
}
let path = data.read_slice(lfh.file_name_length.into())?;
let path = match bytes_to_path(path) {
Ok(path) => path,
Err(err) => return Some(Err(err.into())),
};
let _extra = data.read_slice(lfh.extra_field_length.into())?;
let data_offset = u64::from(offset)
+ u64::try_from(unsafe { data.as_ptr().offset_from(start) }).unwrap();
let data = data.read_slice(lfh.compressed_size as usize)?;
let entry = Entry {
compression: lfh.compression,
path,
data_offset,
data,
};
Some(Ok(entry))
}
entry_impl(data, offset)
.unwrap_or_else(|| Err(Error::with_invalid_data("failed to read archive entry")))
}
fn parse_next_entry(&mut self) -> Result<Entry<'archive>> {
fn entry_impl<'archive>(iter: &mut EntryIter<'archive>) -> Option<Result<Entry<'archive>>> {
let cdfh = iter.cd_record_data.read_pod::<CdFileHeader>()?;
if cdfh.magic != CD_FILE_HEADER_MAGIC {
return Some(Err(Error::with_invalid_data(
"central directory file header contains invalid magic number",
)))
}
let _name = iter
.cd_record_data
.read_slice(cdfh.file_name_length.into())?;
let _extra = iter
.cd_record_data
.read_slice(cdfh.extra_field_length.into())?;
let _comment = iter
.cd_record_data
.read_slice(cdfh.file_comment_length.into())?;
Some(EntryIter::parse_entry_at_offset(
iter.archive_data,
cdfh.offset,
))
}
entry_impl(self).unwrap_or_else(|| {
Err(Error::with_invalid_data(
"failed to read central directory record data",
))
})
}
}
impl<'archive> Iterator for EntryIter<'archive> {
type Item = Result<Entry<'archive>>;
fn next(&mut self) -> Option<Self::Item> {
self.remaining_records = self.remaining_records.checked_sub(1)?;
Some(self.parse_next_entry())
}
}
#[doc(hidden)]
#[derive(Debug)]
pub struct Archive {
mmap: Mmap,
cd_offset: u32,
cd_records: u16,
}
impl Archive {
#[cfg(test)]
pub(crate) fn open<P>(path: P) -> Result<Self>
where
P: AsRef<Path>,
{
let mmap = Mmap::builder().open(path)?;
Self::with_mmap(mmap)
}
#[doc(hidden)]
pub fn with_mmap(mmap: Mmap) -> Result<Self> {
let (cd_offset, cd_records) = Archive::find_cd(&mmap)?;
let slf = Archive {
mmap,
cd_offset,
cd_records,
};
Ok(slf)
}
fn try_parse_end_of_cd(mut data: &[u8]) -> Option<Result<(u32, u16)>> {
let eocd = data.read_pod::<EndOfCdRecord>()?;
if eocd.magic != END_OF_CD_RECORD_MAGIC {
return None
}
let () = data.ensure(eocd.comment_length.into())?;
if eocd.this_disk != 0 || eocd.cd_disk != 0 || eocd.cd_records_total != eocd.cd_records {
Some(Err(Error::with_invalid_data(
"archive is unsupported and cannot be opened",
)))
} else {
Some(Ok((eocd.cd_offset, eocd.cd_records)))
}
}
fn find_cd(data: &[u8]) -> Result<(u32, u16)> {
let end = data
.len()
.checked_sub(size_of::<EndOfCdRecord>())
.ok_or_invalid_data(|| {
"archive is too small to contain end of central directory object"
})?;
let start = end.saturating_sub(1 << 16);
for offset in (start..=end).rev() {
let result = Self::try_parse_end_of_cd(data.get(offset..).unwrap());
match result {
None => continue,
Some(Ok((cd_offset, cd_records))) => {
let cd_range = cd_offset as usize
..cd_offset as usize + usize::from(cd_records) * size_of::<CdFileHeader>();
let _cd = data.get(cd_range).ok_or_unexpected_eof(|| {
"failed to retrieve central directory entries; archive is corrupted"
})?;
return Ok((cd_offset, cd_records))
}
Some(Err(err)) => return Err(err),
}
}
Err(Error::with_invalid_data(
"archive does not contain central directory",
))
}
#[doc(hidden)]
pub fn entries(&self) -> impl Iterator<Item = Result<Entry<'_>>> {
let archive_data = &self.mmap;
let cd_record_data = self.mmap.get(self.cd_offset as usize..).unwrap();
let remaining_records = self.cd_records;
let iter = EntryIter {
archive_data,
cd_record_data,
remaining_records,
};
iter
}
#[inline]
pub(crate) fn mmap(&self) -> &Mmap {
&self.mmap
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::copy;
use std::io::Write as _;
use std::ops::Deref as _;
use tempfile::NamedTempFile;
use test_log::test;
use crate::elf::ElfParser;
use crate::ErrorKind;
#[test]
fn debug_repr() {
let zip = Path::new(&env!("CARGO_MANIFEST_DIR"))
.join("data")
.join("test.zip");
let archive = Archive::open(zip).unwrap();
assert_ne!(format!("{archive:?}"), "");
let entry = Entry {
compression: 42,
path: Path::new("some-entry-path.so"),
data_offset: 56,
data: &[1, 2, 3, 4],
};
let dbg = format!("{entry:?}");
assert_eq!(
dbg,
r#"Entry { compression: 42, path: "some-entry-path.so", data_offset: 56, data: Some([1, 2, 3, 4]) }"#
);
}
#[test]
fn zip_opening() {
let zip = Path::new(&env!("CARGO_MANIFEST_DIR"))
.join("data")
.join("test.zip");
let _archive = Archive::open(zip).unwrap();
}
#[test]
fn zip_entry_iteration() {
let zip = Path::new(&env!("CARGO_MANIFEST_DIR"))
.join("data")
.join("test.zip");
let archive = Archive::open(zip).unwrap();
assert_eq!(
archive
.entries()
.inspect(|result| assert!(result.is_ok(), "{result:?}"))
.count(),
4
);
}
#[test]
fn zip_entry_reading() {
let zip = Path::new(&env!("CARGO_MANIFEST_DIR"))
.join("data")
.join("test.zip");
let archive = Archive::open(zip).unwrap();
let result = archive
.entries()
.find(|entry| entry.as_ref().unwrap().path == Path::new("non-existent"));
assert!(result.is_none());
let entry = archive
.entries()
.find(|entry| entry.as_ref().unwrap().path == Path::new("zip-dir/test-no-debug.bin"))
.unwrap()
.unwrap();
assert_eq!(entry.compression, 0);
assert_eq!(entry.path, Path::new("zip-dir/test-no-debug.bin"));
assert_eq!(
entry.data,
archive
.mmap
.get(entry.data_offset as usize..entry.data_offset as usize + entry.data.len())
.unwrap()
);
let mut file = NamedTempFile::new().unwrap();
let () = file.write_all(entry.data).unwrap();
let module = file.path().as_os_str().to_os_string();
let elf = ElfParser::from_file(file.as_file(), module).unwrap();
assert!(elf.find_section(".text").is_ok());
}
#[test]
fn zip_creation_corrupted() {
let zip = Path::new(&env!("CARGO_MANIFEST_DIR"))
.join("data")
.join("test.zip");
let archive = Archive::open(zip).unwrap();
let mut corrupted_zip = NamedTempFile::new().unwrap();
let mut partial_data = archive
.mmap
.deref()
.get(
..archive.cd_offset as usize
+ usize::from(archive.cd_records) * size_of::<CdFileHeader>()
- 1,
)
.unwrap();
let _cnt = copy(&mut partial_data, &mut corrupted_zip).unwrap();
let err = Archive::open(corrupted_zip.path()).unwrap_err();
assert_eq!(err.kind(), ErrorKind::InvalidData, "{err}");
}
}