use crate::{Error, Result, codec};
use std::io::{Cursor, Read, Seek, SeekFrom};
use super::SIGNATURE_HEADER_SIZE;
use super::files::{ArchiveEntry, FilesInfo};
use super::header::StartHeader;
use super::property_id;
use super::reader::read_u8;
use super::streams::{Folder, PackInfo, ResourceLimits, SubStreamsInfo, UnpackInfo};
#[derive(Debug, Clone, Default)]
pub struct ArchiveHeader {
pub pack_info: Option<PackInfo>,
pub unpack_info: Option<UnpackInfo>,
pub substreams_info: Option<SubStreamsInfo>,
pub files_info: Option<FilesInfo>,
pub header_encrypted: bool,
}
impl ArchiveHeader {
pub fn entries(&self) -> &[ArchiveEntry] {
self.files_info.as_ref().map_or(&[], |f| &f.entries)
}
pub fn folders(&self) -> &[Folder] {
self.unpack_info.as_ref().map_or(&[], |u| &u.folders)
}
}
#[derive(Debug)]
pub struct HeaderParser {
limits: ResourceLimits,
bytes_read: u64,
recursion_depth: u32,
max_recursion_depth: u32,
#[cfg(feature = "aes")]
password: Option<crate::crypto::Password>,
}
impl HeaderParser {
pub fn new() -> Self {
Self::with_limits(ResourceLimits::default())
}
pub fn with_limits(limits: ResourceLimits) -> Self {
Self {
limits,
bytes_read: 0,
recursion_depth: 0,
max_recursion_depth: 4,
#[cfg(feature = "aes")]
password: None,
}
}
#[cfg(feature = "aes")]
pub fn with_password(mut self, password: Option<crate::crypto::Password>) -> Self {
self.password = password;
self
}
pub fn parse_header<R: Read>(&mut self, r: &mut R) -> Result<ArchiveHeader> {
let first_byte = read_u8(r)?;
self.bytes_read += 1;
match first_byte {
property_id::HEADER => self.parse_main_header(r),
property_id::ENCODED_HEADER => Err(Error::UnsupportedFeature {
feature: "encoded headers require seekable reader - use parse_header_with_seek",
}),
_ => Err(Error::InvalidFormat(format!(
"expected header marker, got {:#x}",
first_byte
))),
}
}
pub fn parse_header_with_seek<R: Read + Seek>(
&mut self,
r: &mut R,
archive_data_start: u64,
) -> Result<ArchiveHeader> {
let first_byte = read_u8(r)?;
self.bytes_read += 1;
match first_byte {
property_id::HEADER => self.parse_main_header(r),
property_id::ENCODED_HEADER => self.parse_encoded_header(r, archive_data_start),
_ => Err(Error::InvalidFormat(format!(
"expected header marker, got {:#x}",
first_byte
))),
}
}
fn parse_main_header<R: Read>(&mut self, r: &mut R) -> Result<ArchiveHeader> {
let mut header = ArchiveHeader::default();
loop {
let prop_id = read_u8(r)?;
self.bytes_read += 1;
self.check_byte_limit()?;
match prop_id {
property_id::END => break,
property_id::MAIN_STREAMS_INFO => {
self.parse_streams_info(r, &mut header)?;
}
property_id::FILES_INFO => {
let (sizes, crcs) = self.get_file_sizes_and_crcs(&header);
header.files_info = Some(FilesInfo::parse(r, &sizes, &crcs, &self.limits)?);
}
_ => {
return Err(Error::CorruptHeader {
offset: self.bytes_read,
reason: format!("unexpected property ID in header: {:#x}", prop_id),
});
}
}
}
Ok(header)
}
fn parse_streams_info<R: Read>(&mut self, r: &mut R, header: &mut ArchiveHeader) -> Result<()> {
loop {
let prop_id = read_u8(r)?;
self.bytes_read += 1;
self.check_byte_limit()?;
match prop_id {
property_id::END => break,
property_id::PACK_INFO => {
header.pack_info = Some(PackInfo::parse(r, &self.limits)?);
}
property_id::UNPACK_INFO => {
header.unpack_info = Some(UnpackInfo::parse(r, &self.limits)?);
}
property_id::SUBSTREAMS_INFO => {
let folders = header
.unpack_info
.as_ref()
.map_or(&[] as &[Folder], |u| &u.folders);
header.substreams_info = Some(SubStreamsInfo::parse(r, folders, &self.limits)?);
}
_ => {
return Err(Error::CorruptHeader {
offset: self.bytes_read,
reason: format!("unexpected property ID in streams info: {:#x}", prop_id),
});
}
}
}
Ok(())
}
fn parse_encoded_header<R: Read + Seek>(
&mut self,
r: &mut R,
archive_data_start: u64,
) -> Result<ArchiveHeader> {
self.recursion_depth += 1;
if self.recursion_depth > self.max_recursion_depth {
return Err(Error::ResourceLimitExceeded(
"maximum encoded header recursion depth exceeded".into(),
));
}
let mut streams_header = ArchiveHeader::default();
self.parse_streams_info(r, &mut streams_header)?;
let current_pos = r.stream_position()?;
let header_encrypted = Self::folder_uses_encryption(&streams_header);
let data_base = if header_encrypted {
current_pos
} else {
archive_data_start
};
let decompressed = self.decompress_header(r, &streams_header, data_base)?;
if decompressed.is_empty() {
return Err(Error::InvalidFormat("empty decompressed header".into()));
}
let first_byte = decompressed[0];
match first_byte {
property_id::HEADER => {
let mut cursor = Cursor::new(&decompressed[1..]);
let mut header = self.parse_main_header(&mut cursor)?;
header.header_encrypted = header_encrypted;
Ok(header)
}
property_id::ENCODED_HEADER => {
let mut cursor = Cursor::new(&decompressed[1..]);
let mut nested_streams = ArchiveHeader::default();
self.parse_streams_info(&mut cursor, &mut nested_streams)?;
let nested_encrypted = Self::folder_uses_encryption(&nested_streams);
let nested_decompressed =
self.decompress_header(r, &nested_streams, archive_data_start)?;
if nested_decompressed.is_empty() {
return Err(Error::InvalidFormat(
"empty nested decompressed header".into(),
));
}
if nested_decompressed[0] == property_id::HEADER {
let mut inner_cursor = Cursor::new(&nested_decompressed[1..]);
let mut header = self.parse_main_header(&mut inner_cursor)?;
header.header_encrypted = header_encrypted || nested_encrypted;
Ok(header)
} else {
Err(Error::InvalidFormat(format!(
"unexpected header marker in nested decompressed data: {:#x}",
nested_decompressed[0]
)))
}
}
_ => Err(Error::InvalidFormat(format!(
"unexpected header marker in decompressed data: {:#x}",
first_byte
))),
}
}
fn folder_uses_encryption(header: &ArchiveHeader) -> bool {
if let Some(ref unpack_info) = header.unpack_info {
for folder in &unpack_info.folders {
for coder in &folder.coders {
if coder.method_id.as_slice() == codec::method::AES {
return true;
}
}
}
}
false
}
fn decompress_header<R: Read + Seek>(
&self,
r: &mut R,
header: &ArchiveHeader,
archive_data_start: u64,
) -> Result<Vec<u8>> {
let pack_info = header
.pack_info
.as_ref()
.ok_or_else(|| Error::InvalidFormat("encoded header missing pack info".into()))?;
let unpack_info = header
.unpack_info
.as_ref()
.ok_or_else(|| Error::InvalidFormat("encoded header missing unpack info".into()))?;
if unpack_info.folders.is_empty() {
return Err(Error::InvalidFormat("encoded header has no folders".into()));
}
let num_pack_streams = pack_info.num_streams();
for (folder_idx, folder) in unpack_info.folders.iter().enumerate() {
folder
.validate_packed_streams(num_pack_streams)
.map_err(|e| Error::InvalidFormat(format!("folder[{}]: {}", folder_idx, e)))?;
folder
.validate_bind_pairs()
.map_err(|e| Error::InvalidFormat(format!("folder[{}]: {}", folder_idx, e)))?;
}
let folder = &unpack_info.folders[0];
let pack_pos = archive_data_start + pack_info.pack_pos;
r.seek(SeekFrom::Start(pack_pos))?;
let pack_size = pack_info
.pack_sizes
.first()
.copied()
.ok_or_else(|| Error::InvalidFormat("encoded header missing pack size".into()))?;
let mut packed_data = vec![0u8; pack_size as usize];
r.read_exact(&mut packed_data)?;
let unpack_size = folder
.final_unpack_size()
.ok_or_else(|| Error::InvalidFormat("encoded header missing unpack size".into()))?;
let cursor = Cursor::new(packed_data);
let mut decoder = self.build_header_decoder(cursor, folder, unpack_size)?;
let mut decompressed = Vec::with_capacity(unpack_size as usize);
decoder.read_to_end(&mut decompressed)?;
if let Some(expected_crc) = folder.unpack_crc {
let actual_crc = crc32fast::hash(&decompressed);
if actual_crc != expected_crc {
return Err(Error::CorruptHeader {
offset: pack_pos,
reason: format!(
"encoded header CRC mismatch: expected {:#x}, got {:#x}",
expected_crc, actual_crc
),
});
}
}
Ok(decompressed)
}
fn build_header_decoder(
&self,
input: Cursor<Vec<u8>>,
folder: &Folder,
unpack_size: u64,
) -> Result<Box<dyn Read>> {
if folder.coders.is_empty() {
return Err(Error::InvalidFormat("folder has no coders".into()));
}
if folder.coders.len() == 1 {
let coder = &folder.coders[0];
return Ok(Box::new(codec::build_decoder(input, coder, unpack_size)?));
}
if folder.coders.len() == 2 {
let outer_coder = &folder.coders[0]; let inner_coder = &folder.coders[1];
#[cfg(feature = "aes")]
if inner_coder.method_id.as_slice() == codec::method::AES {
let password = self.password.as_ref().ok_or(Error::PasswordRequired)?;
let aes_unpack_size = folder.unpack_sizes.get(1).copied().unwrap_or(unpack_size);
let decrypted =
codec::build_decoder_encrypted(input, inner_coder, aes_unpack_size, password)?;
return Ok(Box::new(codec::build_decoder(
decrypted,
outer_coder,
unpack_size,
)?));
}
let codec_unpack_size = folder.unpack_sizes.get(1).copied().unwrap_or(unpack_size);
let inner = codec::build_decoder(input, inner_coder, codec_unpack_size)?;
return Ok(Box::new(codec::build_decoder(
inner,
outer_coder,
unpack_size,
)?));
}
Err(Error::UnsupportedFeature {
feature: "encoded headers with more than 2 coders",
})
}
fn get_file_sizes_and_crcs(&self, header: &ArchiveHeader) -> (Vec<u64>, Vec<Option<u32>>) {
if let Some(ref substreams) = header.substreams_info {
(substreams.unpack_sizes.clone(), substreams.digests.clone())
} else if let Some(ref unpack_info) = header.unpack_info {
let sizes: Vec<u64> = unpack_info
.folders
.iter()
.filter_map(|f| f.final_unpack_size())
.collect();
let crcs: Vec<Option<u32>> = unpack_info.folders.iter().map(|f| f.unpack_crc).collect();
(sizes, crcs)
} else {
(Vec::new(), Vec::new())
}
}
fn check_byte_limit(&self) -> Result<()> {
if self.bytes_read > self.limits.max_header_bytes {
Err(Error::ResourceLimitExceeded(
"header byte limit exceeded".into(),
))
} else {
Ok(())
}
}
}
impl Default for HeaderParser {
fn default() -> Self {
Self::new()
}
}
pub fn read_archive_header<R: Read + Seek>(
r: &mut R,
limits: Option<ResourceLimits>,
) -> Result<(StartHeader, ArchiveHeader)> {
read_archive_header_internal(r, limits, 0, None)
}
#[cfg(feature = "aes")]
pub fn read_archive_header_with_password<R: Read + Seek>(
r: &mut R,
limits: Option<ResourceLimits>,
password: Option<crate::crypto::Password>,
) -> Result<(StartHeader, ArchiveHeader)> {
read_archive_header_internal(r, limits, 0, password)
}
pub fn read_archive_header_with_offset<R: Read + Seek>(
r: &mut R,
limits: Option<ResourceLimits>,
sfx_offset: u64,
) -> Result<(StartHeader, ArchiveHeader)> {
read_archive_header_internal(r, limits, sfx_offset, None)
}
#[cfg(feature = "aes")]
pub fn read_archive_header_with_offset_and_password<R: Read + Seek>(
r: &mut R,
limits: Option<ResourceLimits>,
sfx_offset: u64,
password: Option<crate::crypto::Password>,
) -> Result<(StartHeader, ArchiveHeader)> {
read_archive_header_internal(r, limits, sfx_offset, password)
}
#[cfg(feature = "aes")]
fn read_archive_header_internal<R: Read + Seek>(
r: &mut R,
limits: Option<ResourceLimits>,
sfx_offset: u64,
password: Option<crate::crypto::Password>,
) -> Result<(StartHeader, ArchiveHeader)> {
let mut start_header = StartHeader::parse(r)?;
start_header.sfx_offset = sfx_offset;
if start_header.next_header_size == 0 {
return Ok((start_header, ArchiveHeader::default()));
}
let header_pos = start_header.next_header_position();
r.seek(SeekFrom::Start(header_pos))?;
let mut header_data = vec![0u8; start_header.next_header_size as usize];
r.read_exact(&mut header_data)?;
let actual_crc = crc32fast::hash(&header_data);
if actual_crc != start_header.next_header_crc {
return Err(Error::CorruptHeader {
offset: header_pos,
reason: format!(
"next header CRC mismatch: expected {:#x}, got {:#x}",
start_header.next_header_crc, actual_crc
),
});
}
let mut parser = limits
.map(HeaderParser::with_limits)
.unwrap_or_default()
.with_password(password);
if header_data.is_empty() {
return Err(Error::InvalidFormat("empty header data".into()));
}
let first_byte = header_data[0];
let archive_header = match first_byte {
property_id::HEADER => {
let mut cursor = Cursor::new(&header_data[1..]);
parser.parse_main_header(&mut cursor)?
}
property_id::ENCODED_HEADER => {
r.seek(SeekFrom::Start(header_pos))?;
let data_start = sfx_offset + SIGNATURE_HEADER_SIZE;
parser.parse_header_with_seek(r, data_start)?
}
_ => {
return Err(Error::InvalidFormat(format!(
"expected header marker, got {:#x}",
first_byte
)));
}
};
Ok((start_header, archive_header))
}
#[cfg(not(feature = "aes"))]
fn read_archive_header_internal<R: Read + Seek>(
r: &mut R,
limits: Option<ResourceLimits>,
sfx_offset: u64,
_password: Option<()>,
) -> Result<(StartHeader, ArchiveHeader)> {
let mut start_header = StartHeader::parse(r)?;
start_header.sfx_offset = sfx_offset;
if start_header.next_header_size == 0 {
return Ok((start_header, ArchiveHeader::default()));
}
let header_pos = start_header.next_header_position();
r.seek(SeekFrom::Start(header_pos))?;
let mut header_data = vec![0u8; start_header.next_header_size as usize];
r.read_exact(&mut header_data)?;
let actual_crc = crc32fast::hash(&header_data);
if actual_crc != start_header.next_header_crc {
return Err(Error::CorruptHeader {
offset: header_pos,
reason: format!(
"next header CRC mismatch: expected {:#x}, got {:#x}",
start_header.next_header_crc, actual_crc
),
});
}
let mut parser = limits.map(HeaderParser::with_limits).unwrap_or_default();
if header_data.is_empty() {
return Err(Error::InvalidFormat("empty header data".into()));
}
let first_byte = header_data[0];
let archive_header = match first_byte {
property_id::HEADER => {
let mut cursor = Cursor::new(&header_data[1..]);
parser.parse_main_header(&mut cursor)?
}
property_id::ENCODED_HEADER => {
r.seek(SeekFrom::Start(header_pos))?;
let data_start = sfx_offset + SIGNATURE_HEADER_SIZE;
parser.parse_header_with_seek(r, data_start)?
}
_ => {
return Err(Error::InvalidFormat(format!(
"expected header marker, got {:#x}",
first_byte
)));
}
};
Ok((start_header, archive_header))
}
#[cfg(test)]
#[allow(clippy::vec_init_then_push)]
mod tests {
use super::*;
use std::io::Cursor;
fn write_variable_u64(buf: &mut Vec<u8>, value: u64) {
use super::super::reader::write_variable_u64;
write_variable_u64(buf, value).unwrap();
}
#[test]
fn test_header_parser_empty() {
let data = vec![property_id::HEADER, property_id::END];
let mut cursor = Cursor::new(&data);
let mut parser = HeaderParser::new();
let header = parser.parse_header(&mut cursor).unwrap();
assert!(header.pack_info.is_none());
assert!(header.unpack_info.is_none());
assert!(header.files_info.is_none());
}
#[test]
fn test_header_parser_invalid_first_byte() {
let data = vec![0x99];
let mut cursor = Cursor::new(&data);
let mut parser = HeaderParser::new();
let err = parser.parse_header(&mut cursor).unwrap_err();
assert!(matches!(err, Error::InvalidFormat(_)));
}
#[test]
fn test_header_parser_with_streams() {
let mut data = Vec::new();
data.push(property_id::HEADER);
data.push(property_id::MAIN_STREAMS_INFO);
data.push(property_id::PACK_INFO);
write_variable_u64(&mut data, 0); write_variable_u64(&mut data, 1); data.push(property_id::SIZE);
write_variable_u64(&mut data, 1000); data.push(property_id::END);
data.push(property_id::END);
data.push(property_id::END);
let mut cursor = Cursor::new(&data);
let mut parser = HeaderParser::new();
let header = parser.parse_header(&mut cursor).unwrap();
assert!(header.pack_info.is_some());
let pack_info = header.pack_info.unwrap();
assert_eq!(pack_info.pack_sizes, vec![1000]);
}
#[test]
fn test_encoded_header_requires_codecs() {
let data = vec![
property_id::ENCODED_HEADER,
property_id::END, ];
let mut cursor = Cursor::new(&data);
let mut parser = HeaderParser::new();
let err = parser.parse_header(&mut cursor).unwrap_err();
assert!(matches!(err, Error::UnsupportedFeature { .. }));
}
#[test]
fn test_resource_limits() {
let limits = ResourceLimits {
max_entries: 10,
max_header_bytes: 100,
..Default::default()
};
let parser = HeaderParser::with_limits(limits.clone());
assert_eq!(parser.limits.max_entries, 10);
assert_eq!(parser.limits.max_header_bytes, 100);
}
}