use super::{reader::CramError, varint};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ContentType {
FileHeader,
CompressionHeader,
SliceHeader,
ExternalData,
CoreData,
}
impl ContentType {
fn from_byte(b: u8) -> Result<Self, CramError> {
match b {
0 => Ok(Self::FileHeader),
1 => Ok(Self::CompressionHeader),
2 => Ok(Self::SliceHeader),
4 => Ok(Self::ExternalData),
5 => Ok(Self::CoreData),
_ => Err(CramError::UnknownBlockContentType { content_type: b }),
}
}
}
#[derive(Debug)]
pub struct Block {
pub content_type: ContentType,
pub content_id: i32,
pub data: Vec<u8>,
}
pub fn parse_block(buf: &[u8]) -> Result<(Block, usize), CramError> {
parse_block_inner(buf, None, None)
}
pub(crate) fn parse_block_with_buf(
buf: &[u8],
rans_4x8_buf: Option<&mut super::rans::Rans4x8Buf>,
nx16_order1_buf: Option<&mut super::rans_nx16::Nx16Order1Buf>,
) -> Result<(Block, usize), CramError> {
parse_block_inner(buf, rans_4x8_buf, nx16_order1_buf)
}
fn parse_block_inner(
buf: &[u8],
rans_4x8_buf: Option<&mut super::rans::Rans4x8Buf>,
nx16_order1_buf: Option<&mut super::rans_nx16::Nx16Order1Buf>,
) -> Result<(Block, usize), CramError> {
let mut pos = 0;
let &method = buf.get(pos).ok_or(CramError::Truncated { context: "block method" })?;
pos = pos.checked_add(1).ok_or(CramError::Truncated { context: "block method pos" })?;
let &content_type_byte =
buf.get(pos).ok_or(CramError::Truncated { context: "block content type" })?;
let content_type = ContentType::from_byte(content_type_byte)?;
pos = pos.checked_add(1).ok_or(CramError::Truncated { context: "block content type pos" })?;
let (content_id_u32, n) = varint::decode_itf8(
buf.get(pos..).ok_or(CramError::Truncated { context: "block content id" })?,
)
.ok_or(CramError::Truncated { context: "block content id" })?;
let content_id = content_id_u32.cast_signed();
pos = pos.checked_add(n).ok_or(CramError::Truncated { context: "block content id pos" })?;
let (compressed_size, n) = varint::decode_itf8(
buf.get(pos..).ok_or(CramError::Truncated { context: "block compressed size" })?,
)
.ok_or(CramError::Truncated { context: "block compressed size" })?;
pos =
pos.checked_add(n).ok_or(CramError::Truncated { context: "block compressed size pos" })?;
let (uncompressed_size, n) = varint::decode_itf8(
buf.get(pos..).ok_or(CramError::Truncated { context: "block uncompressed size" })?,
)
.ok_or(CramError::Truncated { context: "block uncompressed size" })?;
pos = pos
.checked_add(n)
.ok_or(CramError::Truncated { context: "block uncompressed size pos" })?;
let crc_start = 0;
let compressed_end = pos
.checked_add(compressed_size as usize)
.ok_or(CramError::Truncated { context: "block compressed end overflow" })?;
let compressed_data =
buf.get(pos..compressed_end).ok_or(CramError::Truncated { context: "block data" })?;
pos = compressed_end;
let crc_end =
pos.checked_add(4).ok_or(CramError::Truncated { context: "block CRC32 pos overflow" })?;
let crc_bytes = buf.get(pos..crc_end).ok_or(CramError::Truncated { context: "block CRC32" })?;
let expected_crc = u32::from_le_bytes(
crc_bytes.try_into().map_err(|_| CramError::Truncated { context: "block CRC32" })?,
);
let crc_data = buf
.get(crc_start..compressed_end)
.ok_or(CramError::Truncated { context: "block CRC32 range" })?;
let mut crc = libdeflater::Crc::new();
crc.update(crc_data);
if crc.sum() != expected_crc {
return Err(CramError::ChecksumMismatch {
context: "block",
expected: expected_crc,
found: crc.sum(),
});
}
pos = pos.checked_add(4).ok_or(CramError::Truncated { context: "block pos after CRC" })?;
let uncompressed_size = uncompressed_size as usize;
super::reader::check_alloc_size(uncompressed_size, "block uncompressed size")?;
let data = decompress_block(
method,
compressed_data,
uncompressed_size,
content_type_byte,
content_id,
rans_4x8_buf,
nx16_order1_buf,
)?;
Ok((Block { content_type, content_id, data }, pos))
}
fn decompress_block(
method: u8,
compressed: &[u8],
uncompressed_size: usize,
content_type: u8,
content_id: i32,
rans_4x8_buf: Option<&mut super::rans::Rans4x8Buf>,
nx16_order1_buf: Option<&mut super::rans_nx16::Nx16Order1Buf>,
) -> Result<Vec<u8>, CramError> {
match method {
0 => Ok(compressed.to_vec()),
1 => {
let mut decompressor = libdeflater::Decompressor::new();
let mut output = vec![0u8; uncompressed_size];
decompressor
.gzip_decompress(compressed, &mut output)
.map_err(|source| CramError::GzipDecompressionFailed { source })?;
Ok(output)
}
2 => {
use bzip2::read::BzDecoder;
use std::io::Read;
let mut decoder = BzDecoder::new(compressed);
let mut output = Vec::with_capacity(uncompressed_size);
decoder
.read_to_end(&mut output)
.map_err(|source| CramError::Bzip2DecompressionFailed { source })?;
Ok(output)
}
3 => {
use std::io::Read;
let mut decoder = xz2::read::XzDecoder::new(compressed);
let mut output = Vec::with_capacity(uncompressed_size);
decoder
.read_to_end(&mut output)
.map_err(|source| CramError::LzmaDecompressionFailed { source })?;
Ok(output)
}
4 => match rans_4x8_buf {
Some(buf) => super::rans::decode_with_buf(compressed, buf),
None => super::rans::decode(compressed),
},
5 => match nx16_order1_buf {
Some(buf) => super::rans_nx16::decode_with_buf(compressed, uncompressed_size, buf),
None => super::rans_nx16::decode(compressed, uncompressed_size),
},
8 => super::tok3::decode(compressed),
_ => Err(CramError::UnsupportedCodec { method, content_type, content_id }),
}
}
#[cfg(test)]
#[allow(clippy::cast_possible_truncation, reason = "test helper with known small values")]
pub fn build_test_block(content_type: u8, content_id: i32, data: &[u8]) -> Vec<u8> {
let mut buf = Vec::new();
buf.push(0);
buf.push(content_type);
encode_itf8_to(&mut buf, content_id as u32);
encode_itf8_to(&mut buf, data.len() as u32);
encode_itf8_to(&mut buf, data.len() as u32);
buf.extend_from_slice(data);
let mut crc = libdeflater::Crc::new();
crc.update(&buf);
buf.extend_from_slice(&crc.sum().to_le_bytes());
buf
}
#[cfg(test)]
fn encode_itf8_to(buf: &mut Vec<u8>, val: u32) {
let mut tmp = [0u8; 5];
let n = super::varint::encode_itf8(val, &mut tmp);
#[allow(clippy::indexing_slicing, reason = "n is the return value of encode_itf8, always ≤ 5")]
buf.extend_from_slice(&tmp[..n]);
}
#[cfg(test)]
#[allow(clippy::cast_possible_truncation, reason = "test code with known small values")]
mod tests {
use super::*;
#[test]
fn parse_raw_block() {
let data = b"hello world";
let block_bytes = build_test_block(4, 0, data); let (block, consumed) = parse_block(&block_bytes).unwrap();
assert_eq!(block.content_type, ContentType::ExternalData);
assert_eq!(block.content_id, 0);
assert_eq!(block.data, data);
assert_eq!(consumed, block_bytes.len());
}
#[test]
fn parse_raw_block_compression_header() {
let data = b"\x00\x00\x00"; let block_bytes = build_test_block(1, 0, data);
let (block, _) = parse_block(&block_bytes).unwrap();
assert_eq!(block.content_type, ContentType::CompressionHeader);
}
#[test]
fn parse_raw_block_core_data() {
let data = b"\xFF\x00\xAA";
let block_bytes = build_test_block(5, 0, data);
let (block, _) = parse_block(&block_bytes).unwrap();
assert_eq!(block.content_type, ContentType::CoreData);
assert_eq!(block.data, data);
}
#[test]
fn parse_gzip_block() {
let original = b"hello";
let mut compressor =
libdeflater::Compressor::new(libdeflater::CompressionLvl::new(6).unwrap());
let max_len = compressor.gzip_compress_bound(original.len());
let mut compressed = vec![0u8; max_len];
let actual_len = compressor.gzip_compress(original, &mut compressed).unwrap();
compressed.truncate(actual_len);
let mut buf = Vec::new();
buf.push(1); buf.push(4); encode_itf8_to(&mut buf, 7); encode_itf8_to(&mut buf, compressed.len() as u32);
encode_itf8_to(&mut buf, original.len() as u32);
buf.extend_from_slice(&compressed);
let mut crc = libdeflater::Crc::new();
crc.update(&buf);
buf.extend_from_slice(&crc.sum().to_le_bytes());
let (block, consumed) = parse_block(&buf).unwrap();
assert_eq!(block.content_type, ContentType::ExternalData);
assert_eq!(block.content_id, 7);
assert_eq!(block.data, original);
assert_eq!(consumed, buf.len());
}
#[test]
fn parse_bzip2_block() {
use bzip2::write::BzEncoder;
use std::io::Write;
let original = b"hello bzip2 world, this is a test of bzip2 compression";
let mut encoder = BzEncoder::new(Vec::new(), bzip2::Compression::default());
encoder.write_all(original).unwrap();
let compressed = encoder.finish().unwrap();
let mut buf = Vec::new();
buf.push(2); buf.push(4); encode_itf8_to(&mut buf, 0);
encode_itf8_to(&mut buf, compressed.len() as u32);
encode_itf8_to(&mut buf, original.len() as u32);
buf.extend_from_slice(&compressed);
let mut crc = libdeflater::Crc::new();
crc.update(&buf);
buf.extend_from_slice(&crc.sum().to_le_bytes());
let (block, _) = parse_block(&buf).unwrap();
assert_eq!(block.data, original);
}
#[test]
fn parse_lzma_block() {
use std::io::Write;
let original = b"hello lzma world, this is a test of lzma compression";
let mut encoder = xz2::write::XzEncoder::new(Vec::new(), 6);
encoder.write_all(original).unwrap();
let compressed = encoder.finish().unwrap();
let mut buf = Vec::new();
buf.push(3); buf.push(4); encode_itf8_to(&mut buf, 0);
encode_itf8_to(&mut buf, compressed.len() as u32);
encode_itf8_to(&mut buf, original.len() as u32);
buf.extend_from_slice(&compressed);
let mut crc = libdeflater::Crc::new();
crc.update(&buf);
buf.extend_from_slice(&crc.sum().to_le_bytes());
let (block, _) = parse_block(&buf).unwrap();
assert_eq!(block.data, original);
}
#[test]
fn crc32_mismatch_detected() {
let data = b"test";
let mut block_bytes = build_test_block(4, 0, data);
let len = block_bytes.len();
#[allow(
clippy::indexing_slicing,
reason = "len is the length of the vec just created above"
)]
{
block_bytes[len - 1] ^= 0xFF;
}
let err = parse_block(&block_bytes).unwrap_err();
assert!(matches!(err, CramError::ChecksumMismatch { .. }));
}
#[test]
fn unsupported_codec_detected() {
let mut buf = Vec::new();
buf.push(6); buf.push(4); encode_itf8_to(&mut buf, 0);
encode_itf8_to(&mut buf, 0); encode_itf8_to(&mut buf, 0);
let mut crc = libdeflater::Crc::new();
crc.update(&buf);
buf.extend_from_slice(&crc.sum().to_le_bytes());
let err = parse_block(&buf).unwrap_err();
match err {
CramError::UnsupportedCodec { method, .. } => assert_eq!(method, 6),
other => panic!("expected UnsupportedCodec, got {other:?}"),
}
}
#[test]
fn truncated_block() {
assert!(parse_block(&[]).is_err());
assert!(parse_block(&[0]).is_err());
assert!(parse_block(&[0, 4]).is_err());
}
#[test]
fn multiple_blocks_sequential() {
let block1 = build_test_block(4, 1, b"first");
let block2 = build_test_block(4, 2, b"second");
let mut combined = block1.clone();
combined.extend_from_slice(&block2);
let (b1, consumed1) = parse_block(&combined).unwrap();
assert_eq!(b1.content_id, 1);
assert_eq!(b1.data, b"first");
#[allow(
clippy::indexing_slicing,
reason = "consumed1 is a valid byte offset returned by parse_block"
)]
let (b2, consumed2) = parse_block(&combined[consumed1..]).unwrap();
assert_eq!(b2.content_id, 2);
assert_eq!(b2.data, b"second");
assert_eq!(consumed1 + consumed2, combined.len());
}
#[test]
fn unknown_content_type_returns_error() {
let err = ContentType::from_byte(255).unwrap_err();
assert!(matches!(err, CramError::UnknownBlockContentType { content_type: 255 }));
}
#[test]
fn unknown_content_type_invalid_values() {
for invalid in [3u8, 6, 100, 200] {
let err = ContentType::from_byte(invalid).unwrap_err();
assert!(
matches!(err, CramError::UnknownBlockContentType { content_type: b } if b == invalid),
"expected UnknownBlockContentType for {invalid}"
);
}
}
#[test]
fn gzip_decompression_failed_on_garbage() {
let garbage = [0xFFu8; 10];
let mut buf = Vec::new();
buf.push(1u8); buf.push(4u8); encode_itf8_to(&mut buf, 0);
encode_itf8_to(&mut buf, garbage.len() as u32);
encode_itf8_to(&mut buf, 100u32); buf.extend_from_slice(&garbage);
let mut crc = libdeflater::Crc::new();
crc.update(&buf);
buf.extend_from_slice(&crc.sum().to_le_bytes());
let err = parse_block(&buf).unwrap_err();
assert!(matches!(err, CramError::GzipDecompressionFailed { .. }));
}
#[test]
fn bzip2_decompression_failed_on_garbage() {
let garbage = [0xFFu8; 10];
let mut buf = Vec::new();
buf.push(2u8); buf.push(4u8); encode_itf8_to(&mut buf, 0);
encode_itf8_to(&mut buf, garbage.len() as u32);
encode_itf8_to(&mut buf, 100u32);
buf.extend_from_slice(&garbage);
let mut crc = libdeflater::Crc::new();
crc.update(&buf);
buf.extend_from_slice(&crc.sum().to_le_bytes());
let err = parse_block(&buf).unwrap_err();
assert!(matches!(err, CramError::Bzip2DecompressionFailed { .. }));
}
#[test]
fn lzma_decompression_failed_on_garbage() {
let garbage = [0xFFu8; 10];
let mut buf = Vec::new();
buf.push(3u8); buf.push(4u8); encode_itf8_to(&mut buf, 0);
encode_itf8_to(&mut buf, garbage.len() as u32);
encode_itf8_to(&mut buf, 100u32);
buf.extend_from_slice(&garbage);
let mut crc = libdeflater::Crc::new();
crc.update(&buf);
buf.extend_from_slice(&crc.sum().to_le_bytes());
let err = parse_block(&buf).unwrap_err();
assert!(matches!(err, CramError::LzmaDecompressionFailed { .. }));
}
#[test]
fn expected_compression_header_wrong_content_type() {
let data = b"\x00\x01\x02";
let block_bytes = build_test_block(4, 0, data); let (block, _) = parse_block(&block_bytes).unwrap();
assert_eq!(block.content_type, ContentType::ExternalData);
let err = CramError::ExpectedCompressionHeader { found: block.content_type };
assert!(matches!(
err,
CramError::ExpectedCompressionHeader { found: ContentType::ExternalData }
));
}
#[test]
fn expected_file_header_wrong_content_type() {
let data = b"\x00\x01\x02";
let block_bytes = build_test_block(5, 0, data); let (block, _) = parse_block(&block_bytes).unwrap();
assert_eq!(block.content_type, ContentType::CoreData);
let err = CramError::ExpectedFileHeader { found: block.content_type };
assert!(matches!(err, CramError::ExpectedFileHeader { found: ContentType::CoreData }));
}
#[test]
fn raw_block_from_known_bytes() {
let payload = b"hello";
let size = payload.len() as u32;
let mut buf = Vec::new();
buf.push(0u8); buf.push(4u8); encode_itf8_to(&mut buf, 7); encode_itf8_to(&mut buf, size); encode_itf8_to(&mut buf, size); buf.extend_from_slice(payload);
let mut crc = libdeflater::Crc::new();
crc.update(&buf);
buf.extend_from_slice(&crc.sum().to_le_bytes());
assert_eq!(buf.first(), Some(&0u8), "method byte must be 0 (raw)");
assert_eq!(buf.get(1), Some(&4u8), "content_type byte must be 4 (ExternalData)");
assert_eq!(buf.get(2), Some(&7u8), "content_id ITF8: single byte for val < 0x80");
assert_eq!(buf.get(3), Some(&5u8), "compressed_size ITF8: single byte for val < 0x80");
assert_eq!(buf.get(4), Some(&5u8), "uncompressed_size ITF8: single byte for val < 0x80");
let (block, consumed) = parse_block(&buf).unwrap();
assert_eq!(block.content_type, ContentType::ExternalData);
assert_eq!(block.content_id, 7);
assert_eq!(block.data, payload);
assert_eq!(consumed, buf.len());
let mut buf2 = Vec::new();
buf2.push(0u8); buf2.push(4u8); encode_itf8_to(&mut buf2, 128); encode_itf8_to(&mut buf2, 3u32);
encode_itf8_to(&mut buf2, 3u32);
buf2.extend_from_slice(b"abc");
let mut crc2 = libdeflater::Crc::new();
crc2.update(&buf2);
buf2.extend_from_slice(&crc2.sum().to_le_bytes());
assert_eq!(buf2.get(2), Some(&0x80u8), "first ITF8 byte for 128: 0x80");
assert_eq!(buf2.get(3), Some(&0x80u8), "second ITF8 byte for 128: 0x80");
let (block2, consumed2) = parse_block(&buf2).unwrap();
assert_eq!(block2.content_id, 128);
assert_eq!(block2.data, b"abc");
assert_eq!(consumed2, buf2.len());
}
proptest::proptest! {
#[test]
fn gzip_block_roundtrip(data in proptest::collection::vec(proptest::prelude::any::<u8>(), 1..256)) {
let mut compressor = libdeflater::Compressor::new(libdeflater::CompressionLvl::new(1).unwrap());
let max_len = compressor.gzip_compress_bound(data.len());
let mut compressed = vec![0u8; max_len];
let actual_len = compressor.gzip_compress(&data, &mut compressed).unwrap();
compressed.truncate(actual_len);
let mut buf = Vec::new();
buf.push(1); buf.push(4); encode_itf8_to(&mut buf, 0);
encode_itf8_to(&mut buf, compressed.len() as u32);
encode_itf8_to(&mut buf, data.len() as u32);
buf.extend_from_slice(&compressed);
let mut crc = libdeflater::Crc::new();
crc.update(&buf);
buf.extend_from_slice(&crc.sum().to_le_bytes());
let (block, _) = parse_block(&buf).unwrap();
proptest::prop_assert_eq!(&block.data, &data);
}
}
#[test]
fn parse_block_from_real_cram() {
let data =
std::fs::read(concat!(env!("CARGO_MANIFEST_DIR"), "/../../tests/data/test.cram"))
.unwrap();
#[allow(
clippy::indexing_slicing,
reason = "CRAM file definition is always 26 bytes per spec"
)]
let after_file_def = &data[26..];
let container_length = i32::from_le_bytes([
after_file_def[0],
after_file_def[1],
after_file_def[2],
after_file_def[3],
]);
assert!(container_length > 0, "header container should have positive length");
}
}