use crate::compression::CompressionMethod;
use crate::cp437::FromCp437;
use crate::datetime::DateTime;
use crate::extra_fields::AexEncryption;
use crate::extra_fields::UnicodeExtraField;
use crate::extra_fields::Zip64ExtendedInformation;
use crate::extra_fields::{ExtendedTimestamp, ExtraField, Ntfs, UsedExtraField};
use crate::format::flags::ZipFlags;
use crate::result::{ZipError, ZipResult, invalid};
use crate::spec::{CentralDirectoryEndInfo, DataAndPosition, FixedSizeBlock, ZipCentralEntryBlock};
use crate::types::{System, ZipFileData};
use crate::unstable::LittleEndianReadExt;
use indexmap::IndexMap;
use std::ffi::OsStr;
use std::io::{self, Read, Seek, SeekFrom, Write};
use std::path::Path;
use std::sync::{Arc, OnceLock};
mod config;
pub use config::{ArchiveOffset, Config};
pub(crate) mod stream;
pub use stream::{
read_zipfile_from_stream, read_zipfile_from_stream_with_compressed_size,
read_zipfile_from_stream_with_options,
};
pub(crate) mod magic_finder;
pub(crate) mod readers;
pub(crate) mod zipfile;
pub use zipfile::{ZipFile, ZipFileSeek};
pub(crate) mod zip_archive;
pub use zip_archive::{ZipArchive, ZipArchiveMetadata};
#[cfg(feature = "aes-crypto")]
pub use crate::aes::AesInfo;
pub(crate) fn make_writable_dir_all<T: AsRef<Path>>(outpath: T) -> Result<(), ZipError> {
use std::fs;
fs::create_dir_all(outpath.as_ref())?;
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
std::fs::set_permissions(
outpath.as_ref(),
std::fs::Permissions::from_mode(
0o700 | std::fs::metadata(outpath.as_ref())?.permissions().mode(),
),
)?;
}
Ok(())
}
#[cfg(unix)]
pub(crate) fn make_symlink_impl<T>(
outpath: &Path,
target_str: &str,
_existing_files: &IndexMap<Box<[u8]>, T>,
) -> ZipResult<()> {
std::os::unix::fs::symlink(Path::new(&target_str), outpath)?;
Ok(())
}
#[cfg(windows)]
pub(crate) fn make_symlink_impl<T>(
outpath: &Path,
target_str: &str,
existing_files: &IndexMap<Box<[u8]>, T>,
) -> ZipResult<()> {
use crate::spec::is_dir;
let target = Path::new(OsStr::new(&target_str));
let target_is_dir_from_archive =
is_dir(target_str.as_bytes()) && existing_files.contains_key(target_str.as_bytes());
let target_is_dir = if target_is_dir_from_archive {
true
} else if let Ok(meta) = std::fs::metadata(target) {
meta.is_dir()
} else {
false
};
if target_is_dir {
std::os::windows::fs::symlink_dir(target, outpath)?;
} else {
std::os::windows::fs::symlink_file(target, outpath)?;
}
Ok(())
}
#[cfg(any(windows, unix))]
pub(crate) fn make_symlink<T>(
outpath: &Path,
target: &[u8],
#[cfg_attr(not(any(windows, unix)), allow(unused))] existing_files: &IndexMap<Box<[u8]>, T>,
) -> ZipResult<()> {
let Ok(target_str) = std::str::from_utf8(target) else {
return Err(invalid!("Invalid UTF-8 as symlink target"));
};
make_symlink_impl(outpath, target_str, existing_files)
}
#[cfg(not(any(windows, unix)))]
pub(crate) fn make_symlink<T>(
outpath: &Path,
target: &[u8],
#[cfg_attr(not(any(windows, unix)), allow(unused))] existing_files: &IndexMap<Box<[u8]>, T>,
) -> ZipResult<()> {
let Ok(_) = std::str::from_utf8(target) else {
return Err(invalid!("Invalid UTF-8 as symlink target"));
};
use std::fs::File;
let output = File::create(outpath);
output?.write_all(target)?;
Ok(())
}
#[derive(Debug)]
pub(crate) struct CentralDirectoryInfo {
pub(crate) archive_offset: u64,
pub(crate) directory_start: u64,
pub(crate) number_of_files: usize,
pub(crate) disk_number: u32,
pub(crate) disk_with_central_directory: u32,
}
impl<'a> TryFrom<&'a CentralDirectoryEndInfo> for CentralDirectoryInfo {
type Error = ZipError;
fn try_from(value: &'a CentralDirectoryEndInfo) -> Result<Self, Self::Error> {
let (relative_cd_offset, number_of_files, disk_number, disk_with_central_directory) =
match &value.eocd64 {
Some(DataAndPosition { data: eocd64, .. }) => {
if eocd64.number_of_files_on_this_disk > eocd64.number_of_files {
return Err(invalid!(
"ZIP64 footer indicates more files on this disk than in the whole archive"
));
}
(
eocd64.central_directory_offset,
eocd64.number_of_files as usize,
eocd64.disk_number,
eocd64.disk_with_central_directory,
)
}
_ => (
u64::from(value.eocd.data.central_directory_offset),
value.eocd.data.number_of_files_on_this_disk as usize,
u32::from(value.eocd.data.disk_number),
u32::from(value.eocd.data.disk_with_central_directory),
),
};
let directory_start = relative_cd_offset
.checked_add(value.archive_offset)
.ok_or(invalid!("Invalid central directory size or offset"))?;
Ok(Self {
archive_offset: value.archive_offset,
directory_start,
number_of_files,
disk_number,
disk_with_central_directory,
})
}
}
#[cfg(unix)]
#[derive(Default, Debug)]
struct UnixFileModes {
map: std::collections::BTreeMap<std::path::PathBuf, u32>,
}
#[cfg(unix)]
impl UnixFileModes {
#[cfg_attr(not(debug_assertions), allow(unused))]
pub fn add_mode(&mut self, path: std::path::PathBuf, mode: u32) {
let old_entry = self.map.insert(path, mode);
debug_assert_eq!(old_entry, None);
}
pub fn all_perms_with_children_first(
self,
) -> impl IntoIterator<Item = (std::path::PathBuf, std::fs::Permissions)> {
use std::os::unix::fs::PermissionsExt;
self.map
.into_iter()
.rev()
.map(|(p, m)| (p, std::fs::Permissions::from_mode(m)))
}
}
impl<R: Read + Seek> ZipArchive<R> {
pub(crate) fn merge_contents<W: Write + Seek>(
&mut self,
mut w: W,
) -> ZipResult<IndexMap<Box<[u8]>, ZipFileData>> {
if self.shared.files.is_empty() {
return Ok(IndexMap::new());
}
let mut new_files = self.shared.files.clone();
let first_new_file_header_start = w.stream_position()?;
new_files.values_mut().try_for_each(|f| {
f.header_start = f
.header_start
.checked_add(first_new_file_header_start)
.ok_or(invalid!(
"new header start from merge would have been too large"
))?;
f.central_header_start = 0;
if let Some(old_data_start) = f.data_start.take() {
let new_data_start = old_data_start
.checked_add(first_new_file_header_start)
.ok_or(invalid!(
"new data start from merge would have been too large"
))?;
f.data_start.get_or_init(|| new_data_start);
}
Ok::<_, ZipError>(())
})?;
self.reader.rewind()?;
let length_to_read = self.shared.dir_start;
let mut limited_raw = (&mut self.reader as &mut dyn Read).take(length_to_read);
io::copy(&mut limited_raw, &mut w)?;
Ok(new_files)
}
pub fn extract<P: AsRef<Path>>(&mut self, directory: P) -> ZipResult<()> {
self.extract_internal(directory, None::<fn(&Path) -> bool>)
}
pub fn extract_unwrapped_root_dir<P: AsRef<Path>>(
&mut self,
directory: P,
root_dir_filter: impl RootDirFilter,
) -> ZipResult<()> {
self.extract_internal(directory, Some(root_dir_filter))
}
fn extract_internal<P: AsRef<Path>>(
&mut self,
directory: P,
root_dir_filter: Option<impl RootDirFilter>,
) -> ZipResult<()> {
use std::fs;
fs::create_dir_all(&directory)?;
let directory = directory.as_ref().canonicalize()?;
let root_dir = root_dir_filter
.and_then(|filter| {
self.root_dir(&filter)
.transpose()
.map(|root_dir| root_dir.map(|root_dir| (root_dir, filter)))
})
.transpose()?;
let root_dir = root_dir
.as_ref()
.map(|(root_dir, filter)| {
crate::path::simplified_components(root_dir)
.ok_or_else(|| {
debug_assert!(false, "Invalid root dir path");
invalid!("Invalid root dir path")
})
.map(|root_dir| (root_dir, filter))
})
.transpose()?;
#[cfg(unix)]
let mut files_by_unix_mode = UnixFileModes::default();
for i in 0..self.len() {
let mut file = self.by_index(i)?;
let mut outpath = directory.clone();
file.safe_prepare_path(directory.as_ref(), &mut outpath, root_dir.as_ref())?;
#[cfg(any(unix, windows))]
if file.is_symlink() {
let mut target = Vec::with_capacity(file.size() as usize);
file.read_to_end(&mut target)?;
drop(file);
make_symlink(&outpath, &target, &self.shared.files)?;
continue;
} else if file.is_dir() {
make_writable_dir_all(&outpath)?;
continue;
}
let mut outfile = fs::File::create(&outpath)?;
io::copy(&mut file, &mut outfile)?;
#[cfg(unix)]
if let Some(mode) = file.unix_mode() {
files_by_unix_mode.add_mode(outpath, mode);
}
#[cfg(feature = "chrono")]
if let Some(last_modified) = file.last_modified()
&& let Some(t) = last_modified.datetime_to_systemtime()
{
outfile.set_modified(t)?;
}
}
#[cfg(unix)]
for (path, perms) in files_by_unix_mode.all_perms_with_children_first() {
std::fs::set_permissions(path, perms)?;
}
Ok(())
}
}
pub(crate) fn central_header_to_zip_file<R: Read + Seek>(
reader: &mut R,
central_directory: &CentralDirectoryInfo,
) -> ZipResult<(ZipFileData, Box<[u8]>)> {
let central_header_start = reader.stream_position()?;
let block = ZipCentralEntryBlock::parse(reader)?;
let (file, file_name_raw) = central_header_to_zip_file_inner(
reader,
central_directory.archive_offset,
central_header_start,
block,
)?;
let central_header_end = reader.stream_position()?;
reader.seek(SeekFrom::Start(central_header_end))?;
Ok((file, file_name_raw.into()))
}
#[inline]
fn read_variable_length_byte_field<R: Read>(reader: &mut R, len: usize) -> ZipResult<Vec<u8>> {
let mut data = vec![0; len];
if let Err(e) = reader.read_exact(&mut data) {
if e.kind() == io::ErrorKind::UnexpectedEof {
return Err(invalid!(
"Variable-length field extends beyond file boundary"
));
}
return Err(e.into());
}
Ok(data)
}
fn central_header_to_zip_file_inner<R: Read>(
reader: &mut R,
archive_offset: u64,
central_header_start: u64,
block: ZipCentralEntryBlock,
) -> ZipResult<(ZipFileData, Vec<u8>)> {
let ZipCentralEntryBlock {
version_made_by,
flags,
compression_method,
last_mod_time,
last_mod_date,
crc32,
compressed_size,
uncompressed_size,
file_name_length,
extra_field_length,
file_comment_length,
external_file_attributes,
offset,
..
} = block;
let is_utf8 = ZipFlags::matching(flags, ZipFlags::LanguageEncoding);
let mut file_name_raw = read_variable_length_byte_field(reader, file_name_length as usize)?;
let extra_field = read_variable_length_byte_field(reader, extra_field_length as usize)?;
let file_comment_raw = read_variable_length_byte_field(reader, file_comment_length as usize)?;
let file_comment: Box<str> = if is_utf8 {
String::from_utf8_lossy(&file_comment_raw).into()
} else {
file_comment_raw.from_cp437()?.into()
};
let (version_made_by, system) = System::extract_bytes(version_made_by);
let mut result = ZipFileData {
system,
version_made_by,
compression_method: CompressionMethod::parse_from_u16(compression_method),
last_modified_time: DateTime::try_from_msdos(last_mod_date, last_mod_time).ok(),
crc32,
compressed_size: compressed_size.into(),
uncompressed_size: uncompressed_size.into(),
flags,
extra_field: Some(Arc::from(extra_field)),
central_extra_field: None,
file_comment,
header_start: offset.into(),
extra_data_start: None,
central_header_start,
data_start: OnceLock::new(),
external_attributes: external_file_attributes,
large_file: false,
aes_mode: None,
aes_extra_data_start: 0,
extra_fields: Vec::new(),
};
parse_extra_field(&mut result, &mut file_name_raw)?;
result.header_start = result
.header_start
.checked_add(archive_offset)
.ok_or(invalid!("Archive header is too large"))?;
Ok((result, file_name_raw))
}
pub(crate) fn parse_extra_field(
file: &mut ZipFileData,
file_name_raw: &mut Vec<u8>,
) -> ZipResult<()> {
let mut extra_field = file.extra_field.clone();
let mut central_extra_field = file.central_extra_field.clone();
for field_group in [&mut extra_field, &mut central_extra_field] {
let Some(extra_field) = field_group else {
continue;
};
let mut modified = false;
let mut processed_extra_field = vec![];
let len = extra_field.len();
let mut reader = io::Cursor::new(&**extra_field);
let mut position = reader.position();
while position < len as u64 {
let old_position = position;
let remove =
parse_single_extra_field(file, &mut reader, position, false, file_name_raw)?;
position = reader.position();
if remove {
modified = true;
} else {
let field_len = (position - old_position) as usize;
let write_start = processed_extra_field.len();
reader.seek(SeekFrom::Start(old_position))?;
processed_extra_field.extend_from_slice(&vec![0u8; field_len]);
if let Err(e) = reader
.read_exact(&mut processed_extra_field[write_start..(write_start + field_len)])
{
if e.kind() == io::ErrorKind::UnexpectedEof {
return Err(invalid!("Extra field content exceeds declared length"));
}
return Err(e.into());
}
}
}
if modified {
*field_group = Some(Arc::from(processed_extra_field.into_boxed_slice()));
}
}
file.extra_field = extra_field;
file.central_extra_field = central_extra_field;
Ok(())
}
pub(crate) fn parse_single_extra_field<R: Read>(
file: &mut ZipFileData,
reader: &mut R,
bytes_already_read: u64,
disallow_zip64: bool,
file_name_raw: &mut Vec<u8>,
) -> ZipResult<bool> {
let kind = match reader.read_u16_le() {
Ok(kind) => kind,
Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => return Ok(false),
Err(e) => return Err(e.into()),
};
let decoded_extra_field = UsedExtraField::try_from(kind);
let len = match decoded_extra_field {
Ok(known_field) => match reader.read_u16_le() {
Ok(len) => len,
Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => {
return Err(invalid!("Extra field {} header truncated", known_field));
}
Err(e) => return Err(e.into()),
},
Err(()) => {
match reader.read_u16_le() {
Ok(len) => len,
Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => return Ok(false), Err(_e) => {
let mut buf = [0u8; 2048];
while reader.read(&mut buf)? != 0 {
}
return Ok(false);
}
}
}
};
match decoded_extra_field {
Ok(UsedExtraField::Zip64ExtendedInfo) => {
if disallow_zip64 {
return Err(invalid!("Can't write a custom field using the ZIP64 ID"));
}
file.large_file = true;
Zip64ExtendedInformation::parse(
reader,
len,
&mut file.uncompressed_size,
&mut file.compressed_size,
&mut file.header_start,
)?;
return Ok(true);
}
Ok(UsedExtraField::Ntfs) => {
file.extra_fields
.push(ExtraField::Ntfs(Ntfs::try_from_reader(reader, len)?));
}
Ok(UsedExtraField::AeXEncryption) => {
let (aes_options, inner_compression_method) = AexEncryption::parse(reader, len)?;
file.aes_mode = Some(aes_options);
file.compression_method = inner_compression_method;
file.aes_extra_data_start = bytes_already_read;
}
Ok(UsedExtraField::ExtendedTimestamp) => {
file.extra_fields.push(ExtraField::ExtendedTimestamp(
ExtendedTimestamp::try_from_reader(reader, len)?,
));
}
Ok(UsedExtraField::UnicodeComment) => {
file.file_comment = String::from_utf8(
UnicodeExtraField::try_from_reader(reader, len)?
.unwrap_valid(file.file_comment.as_bytes())?
.into_vec(),
)?
.into();
}
Ok(UsedExtraField::UnicodePath) => {
let unicode = UnicodeExtraField::try_from_reader(reader, len)?;
let file_name = unicode.unwrap_valid(file_name_raw)?;
*file_name_raw = file_name.into_vec();
file.flags |= ZipFlags::LanguageEncoding.as_u16();
}
_ => {
if let Err(e) = reader.read_exact(&mut vec![0u8; len as usize]) {
if e.kind() == io::ErrorKind::UnexpectedEof {
return Err(invalid!("Extra field content truncated"));
}
return Err(e.into());
}
}
}
Ok(false)
}
pub trait HasZipMetadata {
fn get_metadata(&self) -> &ZipFileData;
}
#[derive(Default)]
#[non_exhaustive]
pub struct ZipReadOptions<'a> {
password: Option<&'a [u8]>,
ignore_encryption_flag: bool,
ignore_crc: bool,
force_compressed_size: Option<u64>,
force_uncompressed_size: Option<u64>,
force_crc: Option<u32>,
}
impl<'a> ZipReadOptions<'a> {
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[must_use]
pub fn password(mut self, password: Option<&'a [u8]>) -> Self {
self.password = password;
self
}
#[must_use]
pub fn ignore_encryption_flag(mut self, ignore: bool) -> Self {
self.ignore_encryption_flag = ignore;
self
}
#[must_use]
pub fn ignore_crc32(mut self, should_ignore: bool) -> Self {
self.ignore_crc = should_ignore;
self
}
#[must_use]
pub fn override_compressed_size(mut self, comp_size: u64) -> Self {
self.force_compressed_size = Some(comp_size);
self
}
#[must_use]
pub fn override_uncompressed_size(mut self, uncomp_size: u64) -> Self {
self.force_uncompressed_size = Some(uncomp_size);
self
}
#[must_use]
pub fn override_crc(mut self, crc: u32) -> Self {
self.force_crc = Some(crc);
self
}
}
pub trait RootDirFilter: Fn(&Path) -> bool {}
impl<F: Fn(&Path) -> bool> RootDirFilter for F {}
#[must_use]
pub fn root_dir_common_filter(path: &Path) -> bool {
const COMMON_FILTER_ROOT_FILES: &[&str] = &[".DS_Store", "Thumbs.db"];
if path.starts_with("__MACOSX") {
return false;
}
if path.components().count() == 1
&& path.file_name().is_some_and(|file_name| {
COMMON_FILTER_ROOT_FILES
.iter()
.map(OsStr::new)
.any(|cmp| cmp == file_name)
})
{
return false;
}
true
}