use crate::{CompressedBlock, ZiftError};
const MAX_CHUNK_SIZE: usize = 64 * 1024;
const MAX_CHUNKS_PER_STREAM: usize = 100_000;
const MAX_TOTAL_LITERALS: usize = 256 * 1024 * 1024;
const MAX_DECOMPRESSION_RATIO: usize = 250;
pub fn extract_literals(data: &[u8]) -> Result<Vec<CompressedBlock>, ZiftError> {
let mut blocks = Vec::new();
let mut pos = 0usize;
let mut chunk_count = 0usize;
let mut total_literals = 0usize;
if data.starts_with(&[0xff, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61, 0x50, 0x70, 0x59]) {
pos = 10; }
let mut current_literals = Vec::with_capacity(MAX_CHUNK_SIZE);
let mut block_start = pos;
while pos < data.len() {
chunk_count += 1;
if chunk_count >= MAX_CHUNKS_PER_STREAM {
return Err(ZiftError::InvalidData {
offset: pos,
reason: format!("too many Snappy chunks (max {MAX_CHUNKS_PER_STREAM})"),
});
}
if total_literals + current_literals.len() > MAX_TOTAL_LITERALS {
return Err(ZiftError::BlockTooLarge {
size: total_literals + current_literals.len(),
max: MAX_TOTAL_LITERALS,
});
}
let max_allowed_literals = data
.len()
.saturating_mul(MAX_DECOMPRESSION_RATIO)
.max(1024 * 1024);
if total_literals > max_allowed_literals {
return Err(ZiftError::InvalidData {
offset: pos,
reason: format!("decompression ratio exceeded limit of {MAX_DECOMPRESSION_RATIO}"),
});
}
if pos >= data.len() {
break;
}
let chunk_type = data[pos];
pos += 1;
let (chunk_len, tag_len) = decode_chunk_len(data, pos)?;
pos += tag_len;
if chunk_len > MAX_CHUNK_SIZE {
return Err(ZiftError::BlockTooLarge {
size: chunk_len,
max: MAX_CHUNK_SIZE,
});
}
if pos + chunk_len > data.len() {
return Err(ZiftError::InvalidData {
offset: pos,
reason: "chunk exceeds data bounds".to_string(),
});
}
let chunk_data = &data[pos..pos + chunk_len];
match chunk_type {
0x00 => {
return Err(ZiftError::InvalidData {
offset: pos,
reason: "compressed snappy blocks are not supported, only uncompressed blocks are supported".to_string(),
});
}
0x01 => {
if chunk_data.len() > 4 {
current_literals.extend_from_slice(&chunk_data[4..]);
}
}
_ => {
}
}
if current_literals.len() > 32 * 1024 {
total_literals = flush_block(
&mut blocks,
&mut current_literals,
block_start,
pos,
total_literals,
)?;
block_start = pos;
}
pos += chunk_len;
}
if !current_literals.is_empty() {
let _ = flush_block(
&mut blocks,
&mut current_literals,
block_start,
pos,
total_literals,
)?;
}
Ok(blocks)
}
fn flush_block(
blocks: &mut Vec<CompressedBlock>,
literals: &mut Vec<u8>,
block_start: usize,
pos: usize,
total_literals: usize,
) -> Result<usize, ZiftError> {
let new_total = total_literals + literals.len();
if new_total > MAX_TOTAL_LITERALS {
return Err(ZiftError::BlockTooLarge {
size: new_total,
max: MAX_TOTAL_LITERALS,
});
}
let mut block = CompressedBlock::new(
u64::try_from(block_start).unwrap_or(u64::MAX),
u32::try_from(pos - block_start).unwrap_or(u32::MAX),
);
block.uncompressed_len = Some(u32::try_from(literals.len()).unwrap_or(u32::MAX));
block.literals = std::mem::take(literals);
blocks.push(block);
Ok(new_total)
}
fn decode_chunk_len(data: &[u8], start: usize) -> Result<(usize, usize), ZiftError> {
if start + 3 > data.len() {
return Err(ZiftError::InvalidData {
offset: start,
reason: "truncated chunk length — need 3 bytes for Snappy framing length".to_string(),
});
}
let len = data[start] as usize
| ((data[start + 1] as usize) << 8)
| ((data[start + 2] as usize) << 16);
Ok((len, 3))
}
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used)]
mod tests {
use super::*;
#[test]
fn test_empty_data() {
let data = [];
let blocks = extract_literals(&data).unwrap();
assert!(blocks.is_empty());
}
#[test]
fn test_rejects_compressed_chunk() {
let data = [
0xff, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61, 0x50, 0x70, 0x59, 0x00, 0x05, 0x00, 0x00, 0x11, 0x22, 0x33, 0x44, 0x00, ];
let result = extract_literals(&data);
assert!(matches!(result, Err(ZiftError::InvalidData { .. })));
}
}