use super::decoder::extract_literals_from_block;
use super::frame::{parse_frame_header, BlockType, MAX_BLOCK_SIZE};
use crate::{CompressedBlock, ZiftError};
pub(crate) const MAX_TOTAL_LITERALS: usize = 256 * 1024 * 1024;
const MAX_DECOMPRESSION_RATIO: usize = 250;
pub fn extract_literals(data: &[u8]) -> Result<Vec<CompressedBlock>, ZiftError> {
let mut blocks = Vec::new();
let mut pos = 0usize;
let mut total_literals = 0usize;
parse_frame_header(data, &mut pos)?;
loop {
let (block, is_last) = parse_block(data, &mut pos)?;
total_literals = total_literals.saturating_add(block.literals().len());
if total_literals > MAX_TOTAL_LITERALS {
return Err(ZiftError::BlockTooLarge {
size: total_literals,
max: MAX_TOTAL_LITERALS,
});
}
let max_allowed_literals = data
.len()
.saturating_mul(MAX_DECOMPRESSION_RATIO)
.max(1024 * 1024);
if total_literals > max_allowed_literals {
return Err(ZiftError::InvalidData {
offset: pos,
reason: format!("decompression ratio exceeded limit of {MAX_DECOMPRESSION_RATIO}"),
});
}
blocks.push(block);
if is_last {
break;
}
if blocks.len() >= 100_000 {
return Err(ZiftError::InvalidData {
offset: pos,
reason: "too many blocks (>100K), likely malformed".to_string(),
});
}
}
Ok(blocks)
}
pub(crate) fn parse_block(
data: &[u8],
pos: &mut usize,
) -> Result<(CompressedBlock, bool), ZiftError> {
if *pos + 3 > data.len() {
return Err(ZiftError::InvalidData {
offset: *pos,
reason: "truncated block header".to_string(),
});
}
let b0 = data[*pos];
let b1 = data[*pos + 1];
let b2 = data[*pos + 2];
*pos += 3;
let last_block = (b0 & 0x01) != 0;
let block_type = BlockType::from_u8((b0 >> 1) & 0x03).ok_or(ZiftError::InvalidData {
offset: *pos - 3,
reason: "invalid block type".to_string(),
})?;
let block_size = ((b0 >> 3) as usize) | ((b1 as usize) << 5) | ((b2 as usize) << 13);
if block_size > MAX_BLOCK_SIZE {
return Err(ZiftError::BlockTooLarge {
size: block_size,
max: MAX_BLOCK_SIZE,
});
}
let stream_data_size = match block_type {
BlockType::Rle => 1usize,
_ => block_size,
};
if *pos + stream_data_size > data.len() {
return Err(ZiftError::InvalidData {
offset: *pos,
reason: format!(
"block data size {stream_data_size} exceeds remaining data {}",
data.len() - *pos
),
});
}
let block_data = &data[*pos..*pos + stream_data_size];
let compressed_offset = u64::try_from(*pos - 3).unwrap_or(u64::MAX); let compressed_len = u32::try_from(stream_data_size + 3).unwrap_or(u32::MAX);
let mut block = CompressedBlock::new(compressed_offset, compressed_len);
match block_type {
BlockType::Raw => {
block.literals = block_data.to_vec();
block.uncompressed_len = Some(u32::try_from(block_size).unwrap_or(u32::MAX));
}
BlockType::Rle => {
if !block_data.is_empty() {
block.literals = vec![block_data[0]; block_size];
block.uncompressed_len = Some(u32::try_from(block_size).unwrap_or(u32::MAX));
}
}
BlockType::Compressed => {
block.literals = extract_literals_from_block(block_data)?;
block.uncompressed_len = Some(u32::try_from(block_size).unwrap_or(u32::MAX));
}
BlockType::Reserved => {
return Err(ZiftError::InvalidData {
offset: *pos - 3,
reason: "reserved block type".to_string(),
});
}
}
*pos += stream_data_size;
Ok((block, last_block))
}