pub(crate) mod binary_index;
pub(crate) mod decoder;
mod encoder;
pub mod hash_index;
pub(crate) mod header;
mod identity;
pub(crate) mod kv_checksum;
mod offset;
mod trailer;
mod transform;
mod r#type;
pub(crate) use decoder::{Decodable, Decoder, DecoderMeta, ParsedItem};
pub(crate) use encoder::{Encodable, Encoder};
pub use header::Header;
pub use identity::BlockIdentity;
pub use offset::BlockOffset;
pub(crate) use trailer::{TRAILER_START_MARKER, Trailer};
pub use transform::{BlockTransform, CompressionContext, EccParams};
pub use r#type::BlockType;
#[cfg(zstd_any)]
use crate::compression::CompressionProvider as _;
use crate::{
Checksum, CompressionType, Slice,
coding::{Decode, Encode},
fs::FsFile,
table::BlockHandle,
};
use alloc::borrow::Cow;
#[cfg(not(feature = "std"))]
use alloc::vec::Vec;
const MAX_DECOMPRESSION_SIZE: u32 = 256 * 1024 * 1024;
#[inline]
pub(crate) fn expected_parity_len(data_length: u32, params: EccParams) -> u32 {
let (data_shards, parity_shards) = match params {
EccParams::Secded => return data_length.div_ceil(8),
EccParams::Shard {
data_shards,
parity_shards,
} => (u32::from(data_shards), u32::from(parity_shards)),
};
if data_length == 0 || data_shards == 0 || parity_shards == 0 {
return 0;
}
let ceil = (data_length / data_shards)
.saturating_add(u32::from(!data_length.is_multiple_of(data_shards)));
let shard_bytes = ceil.saturating_add(u32::from(!ceil.is_multiple_of(2)));
shard_bytes.saturating_mul(parity_shards)
}
fn block_has_parity(header: &Header, transform: &BlockTransform<'_>) -> bool {
if Header::has_block_flags(header.block_type) {
header.block_flags & header::block_flags::ECC_PARITY != 0
} else {
transform.page_ecc()
}
}
fn block_ecc_params(header: &Header, transform: &BlockTransform<'_>) -> EccParams {
if Header::has_block_flags(header.block_type) {
EccParams::RS_4_2
} else {
transform.ecc_params().unwrap_or(EccParams::RS_4_2)
}
}
#[cfg(zstd_any)]
fn compression_tag_byte(compression: CompressionType) -> u8 {
match compression {
CompressionType::None => 0,
#[cfg(feature = "lz4")]
CompressionType::Lz4 => 1,
CompressionType::Zstd(_) => 3,
CompressionType::ZstdDict { .. } => 4,
}
}
#[cfg_attr(
zstd_any,
expect(
clippy::needless_pass_by_value,
reason = "owned is consumed by encrypt_vec on the non-zstd path; the AAD \
path only borrows it, so by-value is needed for the other cfg"
)
)]
fn encrypt_block_payload(
enc: &dyn crate::encryption::EncryptionProvider,
owned: Option<Vec<u8>>,
borrow: &[u8],
identity: &BlockIdentity,
compression: CompressionType,
block_flags: u8,
) -> crate::Result<Vec<u8>> {
#[cfg(zstd_any)]
{
let plaintext = owned.as_deref().unwrap_or(borrow);
let aad_block_flags = block_flags & !crate::table::block::header::block_flags::ECC_PARITY;
enc.encrypt_block_aad(
plaintext,
identity,
compression_tag_byte(compression),
aad_block_flags,
)
}
#[cfg(not(zstd_any))]
{
let _ = (identity, compression, block_flags);
match owned {
Some(buf) => enc.encrypt_vec(buf),
None => enc.encrypt(borrow),
}
}
}
#[cfg_attr(
zstd_any,
expect(
clippy::needless_pass_by_value,
reason = "raw is consumed by decrypt_vec on the non-zstd path; the AAD path \
only borrows it, so by-value is needed for the other cfg"
)
)]
fn decrypt_block_payload(
enc: &dyn crate::encryption::EncryptionProvider,
raw: Vec<u8>,
identity: &BlockIdentity,
) -> crate::Result<Vec<u8>> {
#[cfg(zstd_any)]
{
enc.decrypt_block_aad(&raw, identity)
}
#[cfg(not(zstd_any))]
{
let _ = identity;
enc.decrypt_vec(raw)
}
}
fn classify_block_trailer(
has_recognized_ecc: bool,
actual_payload_plus_ecc: usize,
data_length: usize,
ecc_length: u32,
handle: &BlockHandle,
) -> crate::Result<EccStatus> {
let trailer = actual_payload_plus_ecc
.checked_sub(data_length)
.ok_or(crate::Error::InvalidHeader("Block"))?;
if has_recognized_ecc {
if trailer != ecc_length as usize {
return Err(crate::Error::InvalidHeader("Block"));
}
Ok(EccStatus::Ok)
} else if trailer == 0 {
Ok(EccStatus::Ok)
} else {
log::warn!(
"block {handle:?} carries an unrecognized ECC trailer ({trailer} B); \
payload verified by checksum but recovery is unavailable — recompact \
to re-stamp with a supported scheme",
);
Ok(EccStatus::Unrecognized)
}
}
pub(crate) struct PreparedBlock<'a> {
header: Header,
payload: Cow<'a, [u8]>,
parity: Option<Vec<u8>>,
pub(crate) layout: Vec<u32>,
}
impl PreparedBlock<'_> {
#[cfg(feature = "std")] pub(crate) fn into_owned(self) -> PreparedBlock<'static> {
PreparedBlock {
header: self.header,
payload: Cow::Owned(self.payload.into_owned()),
parity: self.parity,
layout: self.layout,
}
}
pub(crate) fn write_to<W: std::io::Write>(self, mut writer: &mut W) -> crate::Result<Header> {
self.header.encode_into(&mut writer)?;
writer.write_all(&self.payload)?;
if let Some(parity) = &self.parity {
writer.write_all(parity)?;
}
log::trace!(
"Writing block with size {}B (on-disk: {}B, ecc: {}B) (excluding header of {}B)",
self.header.uncompressed_length,
self.header.data_length,
self.parity.as_ref().map_or(0, Vec::len),
Header::header_len(self.header.block_type),
);
Ok(self.header)
}
}
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Default)]
pub enum EccStatus {
#[default]
Ok,
Unrecognized,
Corrected,
}
#[derive(Clone)]
pub struct Block {
pub header: Header,
pub data: Slice,
}
impl Block {
#[must_use]
pub fn size(&self) -> usize {
self.data.len()
}
fn read_payload_and_verify<R: std::io::Read>(
reader: &mut R,
data_length: u32,
ecc_length: u32,
expected: Checksum,
#[cfg_attr(
not(feature = "page_ecc"),
expect(unused_variables, reason = "recovery scheme only used under page_ecc")
)]
ecc_params: EccParams,
) -> crate::Result<(Vec<u8>, bool)> {
let mut data = vec![0u8; data_length as usize];
reader.read_exact(&mut data)?;
let computed = Checksum::from_raw(crate::hash::hash128(&data));
if ecc_length == 0 {
computed.check(expected).inspect_err(|_| {
log::error!(
"Checksum mismatch for block payload, got={computed}, expected={expected}",
);
})?;
return Ok((data, false));
}
let mut parity = vec![0u8; ecc_length as usize];
reader.read_exact(&mut parity)?;
if computed == expected {
return Ok((data, false));
}
#[cfg(feature = "page_ecc")]
{
let expected_raw = expected.into_u128();
if matches!(ecc_params, crate::table::block::EccParams::Secded) {
let mut healed = data.clone();
if crate::secded::try_correct_block(&mut healed, &parity)
== crate::secded::SecdedOutcome::Corrected
&& crate::hash::hash128(&healed) == expected_raw
{
log::warn!(
"recovered block via SEC-DED single-bit heal after \
checksum mismatch (data_len={}, ecc_len={ecc_length})",
data.len(),
);
return Ok((healed, true));
}
log::error!(
"Checksum mismatch on SEC-DED block, heal failed, \
got={computed}, expected={expected}",
);
return Err(crate::Error::PageEccUnrecoverable {
got: computed,
expected,
});
}
let (data_shards, parity_shards) = ecc_params.as_shards();
if let Some(recovered) = crate::ecc::try_recover(
&data,
&parity,
data.len(),
data_shards,
parity_shards,
|buf| crate::hash::hash128(buf) == expected_raw,
)? {
log::warn!(
"recovered block from RS parity after checksum mismatch \
(data_len={}, ecc_len={ecc_length})",
data.len(),
);
return Ok((recovered, true));
}
log::error!(
"Checksum mismatch on ECC-protected block, recovery failed, \
got={computed}, expected={expected}",
);
Err(crate::Error::PageEccUnrecoverable {
got: computed,
expected,
})
}
#[cfg(not(feature = "page_ecc"))]
{
let _ = parity;
log::error!(
"block has ECC trailer (ecc_length={ecc_length}) but this \
build lacks the page_ecc feature — cannot attempt recovery; \
got={computed}, expected={expected}",
);
Err(crate::Error::ChecksumMismatch {
expected,
got: computed,
})
}
}
pub fn write_into<W: std::io::Write>(
writer: &mut W,
data: &[u8],
identity: BlockIdentity,
transform: &BlockTransform<'_>,
) -> crate::Result<Header> {
Self::write_into_with_flags(writer, data, identity, transform, 0)
}
pub(crate) fn write_into_with_flags<W: std::io::Write>(
writer: &mut W,
data: &[u8],
identity: BlockIdentity,
transform: &BlockTransform<'_>,
extra_flags: u8,
) -> crate::Result<Header> {
Self::prepare_with_flags(data, identity, transform, extra_flags)?.write_to(writer)
}
#[expect(
clippy::too_many_lines,
reason = "linear transform pipeline: compress → encrypt → checksum → ecc; \
each step is small but they share state (header, payload, owned buffers) \
so factoring would just hide the data flow"
)]
pub(crate) fn prepare_with_flags<'a>(
data: &'a [u8],
identity: BlockIdentity,
transform: &BlockTransform<'_>,
extra_flags: u8,
) -> crate::Result<PreparedBlock<'a>> {
let compression = transform.compression();
let encryption = transform.encryption();
#[cfg(zstd_any)]
let zstd_dict = transform.zstd_dict();
let block_type = identity.block_type;
if data.len() > MAX_DECOMPRESSION_SIZE as usize {
return Err(crate::Error::DecompressedSizeTooLarge {
declared: data.len() as u64,
limit: u64::from(MAX_DECOMPRESSION_SIZE),
});
}
let block_flags = {
use crate::table::block::header::block_flags;
debug_assert_eq!(
extra_flags & !block_flags::KNOWN,
0,
"extra_flags must contain only defined block_flags bits",
);
let mut f = extra_flags;
if transform.compression() != CompressionType::None {
f |= block_flags::COMPRESSED;
}
if transform.encryption().is_some() {
f |= block_flags::ENCRYPTED;
}
f
};
let mut header = Header {
block_type,
block_flags,
checksum: Checksum::from_raw(0), data_length: 0,
#[expect(clippy::cast_possible_truncation, reason = "blocks are limited to u32")]
uncompressed_length: data.len() as u32,
};
#[cfg(any(feature = "lz4", zstd_any))]
let mut compressed_buf: Option<Vec<u8>> = None;
#[cfg_attr(
not(zstd_any),
expect(unused_mut, reason = "`layout` is only mutated on zstd-enabled builds")
)]
let mut layout: Vec<u32> = Vec::new();
match compression {
CompressionType::None => {}
#[cfg(feature = "lz4")]
CompressionType::Lz4 => {
compressed_buf = Some(lz4_flex::compress(data));
}
#[cfg(zstd_any)]
CompressionType::Zstd(level) => {
if block_type == BlockType::Data {
let (buf, lay) =
crate::compression::ZstdBackend::compress_with_layout(data, level)?;
compressed_buf = Some(buf);
layout = lay;
} else {
compressed_buf = Some(crate::compression::ZstdBackend::compress(data, level)?);
}
}
#[cfg(zstd_any)]
CompressionType::ZstdDict { level, dict_id } => {
let dict = zstd_dict.ok_or(crate::Error::ZstdDictMismatch {
expected: dict_id,
got: None,
})?;
if dict.id() != dict_id {
return Err(crate::Error::ZstdDictMismatch {
expected: dict_id,
got: Some(dict.id()),
});
}
compressed_buf = Some(crate::compression::ZstdBackend::compress_with_dict(
data,
level,
dict.raw(),
)?);
}
}
let encrypted_buf: Option<Vec<u8>>;
#[cfg(any(feature = "lz4", zstd_any))]
{
encrypted_buf = encryption
.map(|enc| {
encrypt_block_payload(
enc,
compressed_buf.take(),
data,
&identity,
compression,
block_flags,
)
})
.transpose()?;
}
#[cfg(not(any(feature = "lz4", zstd_any)))]
{
encrypted_buf = encryption
.map(|enc| {
encrypt_block_payload(enc, None, data, &identity, compression, block_flags)
})
.transpose()?;
}
let payload: Cow<'a, [u8]> = if let Some(enc) = encrypted_buf {
Cow::Owned(enc)
} else {
#[cfg(any(feature = "lz4", zstd_any))]
{
compressed_buf.map_or(Cow::Borrowed(data), Cow::Owned)
}
#[cfg(not(any(feature = "lz4", zstd_any)))]
{
Cow::Borrowed(data)
}
};
let max_payload = (u64::from(MAX_DECOMPRESSION_SIZE)
+ encryption.map_or(0u64, |enc| u64::from(enc.max_overhead())))
.min(u64::from(u32::MAX));
if payload.len() as u64 > max_payload {
return Err(crate::Error::DecompressedSizeTooLarge {
declared: payload.len() as u64,
limit: max_payload,
});
}
#[expect(clippy::cast_possible_truncation, reason = "bounded by check above")]
let payload_len = payload.len() as u32;
header.data_length = payload_len;
header.checksum = Checksum::from_raw(crate::hash::hash128(&payload));
#[cfg(feature = "page_ecc")]
let parity_buf: Option<Vec<u8>> = if let Some(ecc_params) = transform.ecc_params() {
let p = match ecc_params {
crate::table::block::EccParams::Secded => {
crate::secded::encode_block_parity(&payload)
}
crate::table::block::EccParams::Shard { .. } => {
let (data_shards, parity_shards) = ecc_params.as_shards();
crate::ecc::encode_parity(&payload, data_shards, parity_shards)?
}
};
let p_len =
u32::try_from(p.len()).map_err(|_| crate::Error::DecompressedSizeTooLarge {
declared: p.len() as u64,
limit: u64::from(u32::MAX),
})?;
if p_len > 0 {
header.block_flags |= crate::table::block::header::block_flags::ECC_PARITY;
}
Some(p)
} else {
None
};
#[cfg(not(feature = "page_ecc"))]
let parity_buf: Option<Vec<u8>> = None;
Ok(PreparedBlock {
header,
payload,
parity: parity_buf,
layout,
})
}
#[expect(
clippy::too_many_lines,
reason = "encrypt/no-encrypt branches duplicate compression match — see comment above"
)]
pub fn from_reader<R: std::io::Read>(
reader: &mut R,
identity: BlockIdentity,
transform: &BlockTransform<'_>,
) -> crate::Result<Self> {
let compression = transform.compression();
let encryption = transform.encryption();
#[cfg(zstd_any)]
let zstd_dict = transform.zstd_dict();
let header = Header::decode_from(reader)?;
let enc_overhead = encryption.map_or(0u64, |e| u64::from(e.max_overhead()));
let max_data_length = u64::from(MAX_DECOMPRESSION_SIZE) + enc_overhead;
if u64::from(header.data_length) > max_data_length {
return Err(crate::Error::DecompressedSizeTooLarge {
declared: u64::from(header.data_length),
limit: max_data_length,
});
}
if header.uncompressed_length > MAX_DECOMPRESSION_SIZE {
return Err(crate::Error::DecompressedSizeTooLarge {
declared: u64::from(header.uncompressed_length),
limit: u64::from(MAX_DECOMPRESSION_SIZE),
});
}
let ecc_length = if block_has_parity(&header, transform) {
expected_parity_len(header.data_length, block_ecc_params(&header, transform))
} else {
0
};
let data = if let Some(enc) = encryption {
let (raw_vec, _corrected) = Self::read_payload_and_verify(
reader,
header.data_length,
ecc_length,
header.checksum,
block_ecc_params(&header, transform),
)?;
let decrypted = decrypt_block_payload(enc, raw_vec, &identity)?;
match compression {
CompressionType::None => {
#[expect(
clippy::cast_possible_truncation,
reason = "values are u32 length max"
)]
let actual_len = decrypted.len() as u32;
if header.uncompressed_length != actual_len {
return Err(crate::Error::InvalidHeader("Block"));
}
Slice::from(decrypted)
}
#[cfg(feature = "lz4")]
CompressionType::Lz4 => {
let mut buf = vec![0u8; header.uncompressed_length as usize];
let bytes_written = lz4_flex::decompress_into(&decrypted, &mut buf)
.map_err(|_| crate::Error::Decompress(compression))?;
if bytes_written != header.uncompressed_length as usize {
return Err(crate::Error::Decompress(compression));
}
Slice::from(buf)
}
#[cfg(zstd_any)]
CompressionType::Zstd(_) => {
let decompressed = crate::compression::ZstdBackend::decompress(
&decrypted,
header.uncompressed_length as usize,
)
.map_err(|_| crate::Error::Decompress(compression))?;
if decompressed.len() != header.uncompressed_length as usize {
return Err(crate::Error::Decompress(compression));
}
Slice::from(decompressed)
}
#[cfg(zstd_any)]
CompressionType::ZstdDict { dict_id, .. } => {
let dict = zstd_dict.ok_or(crate::Error::ZstdDictMismatch {
expected: dict_id,
got: None,
})?;
if dict.id() != dict_id {
return Err(crate::Error::ZstdDictMismatch {
expected: dict_id,
got: Some(dict.id()),
});
}
let decompressed = crate::compression::ZstdBackend::decompress_with_dict(
&decrypted,
dict,
header.uncompressed_length as usize,
)
.map_err(|_| crate::Error::Decompress(compression))?;
if decompressed.len() != header.uncompressed_length as usize {
return Err(crate::Error::Decompress(compression));
}
Slice::from(decompressed)
}
}
} else {
let raw_data = if ecc_length == 0 {
let s = Slice::from_reader(reader, header.data_length as usize)?;
let checksum = Checksum::from_raw(crate::hash::hash128(&s));
checksum.check(header.checksum).inspect_err(|_| {
log::error!(
"Checksum mismatch for <bufreader>, got={}, expected={}",
checksum,
header.checksum,
);
})?;
s
} else {
let (payload, _corrected) = Self::read_payload_and_verify(
reader,
header.data_length,
ecc_length,
header.checksum,
block_ecc_params(&header, transform),
)?;
Slice::from(payload)
};
match compression {
CompressionType::None => {
#[expect(
clippy::cast_possible_truncation,
reason = "values are u32 length max"
)]
let actual_len = raw_data.len() as u32;
if header.uncompressed_length != actual_len {
return Err(crate::Error::InvalidHeader("Block"));
}
raw_data
}
#[cfg(feature = "lz4")]
CompressionType::Lz4 => {
let mut buf = vec![0u8; header.uncompressed_length as usize];
let bytes_written = lz4_flex::decompress_into(&raw_data, &mut buf)
.map_err(|_| crate::Error::Decompress(compression))?;
if bytes_written != header.uncompressed_length as usize {
return Err(crate::Error::Decompress(compression));
}
Slice::from(buf)
}
#[cfg(zstd_any)]
CompressionType::Zstd(_) => {
let decompressed = crate::compression::ZstdBackend::decompress(
&raw_data,
header.uncompressed_length as usize,
)
.map_err(|_| crate::Error::Decompress(compression))?;
if decompressed.len() != header.uncompressed_length as usize {
return Err(crate::Error::Decompress(compression));
}
Slice::from(decompressed)
}
#[cfg(zstd_any)]
CompressionType::ZstdDict { dict_id, .. } => {
let dict = zstd_dict.ok_or(crate::Error::ZstdDictMismatch {
expected: dict_id,
got: None,
})?;
if dict.id() != dict_id {
return Err(crate::Error::ZstdDictMismatch {
expected: dict_id,
got: Some(dict.id()),
});
}
let decompressed = crate::compression::ZstdBackend::decompress_with_dict(
&raw_data,
dict,
header.uncompressed_length as usize,
)
.map_err(|_| crate::Error::Decompress(compression))?;
if decompressed.len() != header.uncompressed_length as usize {
return Err(crate::Error::Decompress(compression));
}
Slice::from(decompressed)
}
}
};
Ok(Self { header, data })
}
pub fn from_file(
file: &dyn FsFile,
handle: BlockHandle,
identity: BlockIdentity,
transform: &BlockTransform<'_>,
) -> crate::Result<Self> {
let (block, _status) = Self::from_file_with_status(file, handle, identity, transform)?;
Ok(block)
}
#[expect(
clippy::too_many_lines,
reason = "encrypt/no-encrypt branches duplicate compression match — see from_reader"
)]
pub fn from_file_with_status(
file: &dyn FsFile,
handle: BlockHandle,
identity: BlockIdentity,
transform: &BlockTransform<'_>,
) -> crate::Result<(Self, EccStatus)> {
let compression = transform.compression();
let encryption = transform.encryption();
#[cfg(zstd_any)]
let zstd_dict = transform.zstd_dict();
let enc_overhead = encryption.map_or(0u64, |e| u64::from(e.max_overhead()));
let max_payload = u64::from(MAX_DECOMPRESSION_SIZE) + enc_overhead;
let max_ecc_overhead = match transform.ecc_params() {
Some(params) => {
#[expect(
clippy::cast_possible_truncation,
reason = "max_payload is MAX_DECOMPRESSION_SIZE (+ enc overhead), well below u32::MAX"
)]
let max_payload_u32 = max_payload.min(u64::from(u32::MAX)) as u32;
u64::from(expected_parity_len(max_payload_u32, params))
}
None => 0,
};
let max_on_disk_size = max_payload + max_ecc_overhead + Header::MAX_LEN as u64;
if u64::from(handle.size()) > max_on_disk_size {
return Err(crate::Error::DecompressedSizeTooLarge {
declared: u64::from(handle.size()),
limit: max_on_disk_size,
});
}
let (header, data, ecc_status) = if let Some(enc) = encryption {
let block_size = handle.size() as usize;
if block_size < Header::MIN_LEN {
return Err(crate::Error::InvalidHeader("Block"));
}
let mut buf = vec![0u8; block_size];
let n = file.read_at(&mut buf, *handle.offset())?;
if n != block_size {
return Err(crate::Error::Io(std::io::Error::new(
std::io::ErrorKind::UnexpectedEof,
format!(
"block read_at: expected {block_size} bytes, got {n} at offset {}",
*handle.offset(),
),
)));
}
let parsed_header = Header::decode_from(&mut &buf[..])?;
let header_len = Header::header_len(parsed_header.block_type);
let has_ecc = block_has_parity(&parsed_header, transform);
let ecc_length = if has_ecc {
expected_parity_len(
parsed_header.data_length,
block_ecc_params(&parsed_header, transform),
)
} else {
0
};
let actual_payload_plus_ecc = block_size.saturating_sub(header_len);
let actual_data_len = parsed_header.data_length as usize;
let ecc_status = classify_block_trailer(
has_ecc,
actual_payload_plus_ecc,
actual_data_len,
ecc_length,
&handle,
)?;
let max_data_length = u64::from(MAX_DECOMPRESSION_SIZE) + enc_overhead;
if u64::from(parsed_header.data_length) > max_data_length {
return Err(crate::Error::DecompressedSizeTooLarge {
declared: u64::from(parsed_header.data_length),
limit: max_data_length,
});
}
if parsed_header.uncompressed_length > MAX_DECOMPRESSION_SIZE {
return Err(crate::Error::DecompressedSizeTooLarge {
declared: u64::from(parsed_header.uncompressed_length),
limit: u64::from(MAX_DECOMPRESSION_SIZE),
});
}
let (buf, payload_corrected) = if ecc_length == 0 {
#[expect(
clippy::indexing_slicing,
reason = "actual_data_len <= post-header len"
)]
let checksum = Checksum::from_raw(crate::hash::hash128(
&buf[header_len..header_len + actual_data_len],
));
checksum.check(parsed_header.checksum).inspect_err(|_| {
log::error!(
"Checksum mismatch for block {handle:?}, got={}, expected={}",
checksum,
parsed_header.checksum,
);
})?;
buf.copy_within(header_len..header_len + actual_data_len, 0);
buf.truncate(actual_data_len);
(buf, false)
} else {
#[expect(clippy::indexing_slicing, reason = "header was decoded from buf")]
let mut cursor = std::io::Cursor::new(&buf[header_len..]);
Self::read_payload_and_verify(
&mut cursor,
parsed_header.data_length,
ecc_length,
parsed_header.checksum,
block_ecc_params(&parsed_header, transform),
)?
};
let ecc_status = if payload_corrected {
EccStatus::Corrected
} else {
ecc_status
};
let decrypted = decrypt_block_payload(enc, buf, &identity)?;
let data = match compression {
CompressionType::None => {
#[expect(
clippy::cast_possible_truncation,
reason = "values are u32 length max"
)]
let actual_len = decrypted.len() as u32;
if parsed_header.uncompressed_length != actual_len {
return Err(crate::Error::InvalidHeader("Block"));
}
Slice::from(decrypted)
}
#[cfg(feature = "lz4")]
CompressionType::Lz4 => {
let mut decompressed = vec![0u8; parsed_header.uncompressed_length as usize];
let bytes_written = lz4_flex::decompress_into(&decrypted, &mut decompressed)
.map_err(|_| crate::Error::Decompress(compression))?;
if bytes_written != parsed_header.uncompressed_length as usize {
return Err(crate::Error::Decompress(compression));
}
Slice::from(decompressed)
}
#[cfg(zstd_any)]
CompressionType::Zstd(_) => {
let decompressed = crate::compression::ZstdBackend::decompress(
&decrypted,
parsed_header.uncompressed_length as usize,
)
.map_err(|_| crate::Error::Decompress(compression))?;
if decompressed.len() != parsed_header.uncompressed_length as usize {
return Err(crate::Error::Decompress(compression));
}
Slice::from(decompressed)
}
#[cfg(zstd_any)]
CompressionType::ZstdDict { dict_id, .. } => {
let dict = zstd_dict.ok_or(crate::Error::ZstdDictMismatch {
expected: dict_id,
got: None,
})?;
if dict.id() != dict_id {
return Err(crate::Error::ZstdDictMismatch {
expected: dict_id,
got: Some(dict.id()),
});
}
let decompressed = crate::compression::ZstdBackend::decompress_with_dict(
&decrypted,
dict,
parsed_header.uncompressed_length as usize,
)
.map_err(|_| crate::Error::Decompress(compression))?;
if decompressed.len() != parsed_header.uncompressed_length as usize {
return Err(crate::Error::Decompress(compression));
}
Slice::from(decompressed)
}
};
(parsed_header, data, ecc_status)
} else {
let buf = crate::file::read_exact(file, *handle.offset(), handle.size() as usize)?;
let parsed_header = Header::decode_from(&mut &buf[..])?;
let header_len = Header::header_len(parsed_header.block_type);
let has_ecc = block_has_parity(&parsed_header, transform);
let ecc_length = if has_ecc {
expected_parity_len(
parsed_header.data_length,
block_ecc_params(&parsed_header, transform),
)
} else {
0
};
let actual_payload_plus_ecc = buf.len().saturating_sub(header_len);
let actual_data_len = parsed_header.data_length as usize;
let ecc_status = classify_block_trailer(
has_ecc,
actual_payload_plus_ecc,
actual_data_len,
ecc_length,
&handle,
)?;
if parsed_header.uncompressed_length > MAX_DECOMPRESSION_SIZE {
return Err(crate::Error::DecompressedSizeTooLarge {
declared: u64::from(parsed_header.uncompressed_length),
limit: u64::from(MAX_DECOMPRESSION_SIZE),
});
}
let (payload_slice, payload_corrected): (Slice, bool) = if ecc_length == 0 {
#[expect(
clippy::indexing_slicing,
reason = "actual_data_len <= post-header len"
)]
let checksum = Checksum::from_raw(crate::hash::hash128(
&buf[header_len..header_len + actual_data_len],
));
checksum.check(parsed_header.checksum).inspect_err(|_| {
log::error!(
"Checksum mismatch for block {handle:?}, got={}, expected={}",
checksum,
parsed_header.checksum,
);
})?;
(buf.slice(header_len..header_len + actual_data_len), false)
} else {
#[expect(clippy::indexing_slicing, reason = "header was decoded from buf")]
let mut cursor = std::io::Cursor::new(&buf[header_len..]);
let (payload, corrected) = Self::read_payload_and_verify(
&mut cursor,
parsed_header.data_length,
ecc_length,
parsed_header.checksum,
block_ecc_params(&parsed_header, transform),
)?;
(Slice::from(payload), corrected)
};
let ecc_status = if payload_corrected {
EccStatus::Corrected
} else {
ecc_status
};
let data = match compression {
CompressionType::None => {
#[expect(
clippy::cast_possible_truncation,
reason = "values are u32 length max"
)]
let actual_len = payload_slice.len() as u32;
if parsed_header.uncompressed_length != actual_len {
return Err(crate::Error::InvalidHeader("Block"));
}
payload_slice
}
#[cfg(feature = "lz4")]
CompressionType::Lz4 => {
let compressed_data: &[u8] = &payload_slice;
let mut decompressed = vec![0u8; parsed_header.uncompressed_length as usize];
let bytes_written =
lz4_flex::decompress_into(compressed_data, &mut decompressed)
.map_err(|_| crate::Error::Decompress(compression))?;
if bytes_written != parsed_header.uncompressed_length as usize {
return Err(crate::Error::Decompress(compression));
}
Slice::from(decompressed)
}
#[cfg(zstd_any)]
CompressionType::Zstd(_) => {
let compressed_data: &[u8] = &payload_slice;
let decompressed = crate::compression::ZstdBackend::decompress(
compressed_data,
parsed_header.uncompressed_length as usize,
)
.map_err(|_| crate::Error::Decompress(compression))?;
if decompressed.len() != parsed_header.uncompressed_length as usize {
return Err(crate::Error::Decompress(compression));
}
Slice::from(decompressed)
}
#[cfg(zstd_any)]
CompressionType::ZstdDict { dict_id, .. } => {
let compressed_data: &[u8] = &payload_slice;
let dict = zstd_dict.ok_or(crate::Error::ZstdDictMismatch {
expected: dict_id,
got: None,
})?;
if dict.id() != dict_id {
return Err(crate::Error::ZstdDictMismatch {
expected: dict_id,
got: Some(dict.id()),
});
}
let decompressed = crate::compression::ZstdBackend::decompress_with_dict(
compressed_data,
dict,
parsed_header.uncompressed_length as usize,
)
.map_err(|_| crate::Error::Decompress(compression))?;
if decompressed.len() != parsed_header.uncompressed_length as usize {
return Err(crate::Error::Decompress(compression));
}
Slice::from(decompressed)
}
};
(parsed_header, data, ecc_status)
};
Ok((Self { header, data }, ecc_status))
}
#[cfg(feature = "zstd")]
pub(crate) fn read_data_frame(
file: &dyn FsFile,
handle: BlockHandle,
transform: &BlockTransform<'_>,
) -> crate::Result<(Header, Slice, bool)> {
if transform.encryption().is_some() {
return Err(crate::Error::Io(std::io::Error::other(
"read_data_frame: encrypted blocks are not supported on the lazy path",
)));
}
let max_ecc_overhead = match transform.ecc_params() {
Some(params) => u64::from(expected_parity_len(MAX_DECOMPRESSION_SIZE, params)),
None => 0,
};
let max_on_disk_size =
u64::from(MAX_DECOMPRESSION_SIZE) + max_ecc_overhead + Header::MAX_LEN as u64;
if u64::from(handle.size()) > max_on_disk_size {
return Err(crate::Error::DecompressedSizeTooLarge {
declared: u64::from(handle.size()),
limit: max_on_disk_size,
});
}
let buf = crate::file::read_exact(file, *handle.offset(), handle.size() as usize)?;
let parsed_header = Header::decode_from(&mut &buf[..])?;
if parsed_header.uncompressed_length > MAX_DECOMPRESSION_SIZE {
return Err(crate::Error::DecompressedSizeTooLarge {
declared: u64::from(parsed_header.uncompressed_length),
limit: u64::from(MAX_DECOMPRESSION_SIZE),
});
}
let header_len = Header::header_len(parsed_header.block_type);
let has_ecc = block_has_parity(&parsed_header, transform);
let ecc_length = if has_ecc {
expected_parity_len(
parsed_header.data_length,
block_ecc_params(&parsed_header, transform),
)
} else {
0
};
let actual_payload_plus_ecc = buf.len().saturating_sub(header_len);
let actual_data_len = parsed_header.data_length as usize;
let _ecc_status = classify_block_trailer(
has_ecc,
actual_payload_plus_ecc,
actual_data_len,
ecc_length,
&handle,
)?;
let (payload, corrected): (Slice, bool) = if ecc_length == 0 {
#[expect(
clippy::indexing_slicing,
reason = "actual_data_len <= post-header len, checked via classify_block_trailer"
)]
let checksum = Checksum::from_raw(crate::hash::hash128(
&buf[header_len..header_len + actual_data_len],
));
checksum.check(parsed_header.checksum)?;
(buf.slice(header_len..header_len + actual_data_len), false)
} else {
#[expect(clippy::indexing_slicing, reason = "header was decoded from buf")]
let mut cursor = std::io::Cursor::new(&buf[header_len..]);
let (frame, corrected) = Self::read_payload_and_verify(
&mut cursor,
parsed_header.data_length,
ecc_length,
parsed_header.checksum,
block_ecc_params(&parsed_header, transform),
)?;
(Slice::from(frame), corrected)
};
Ok((parsed_header, payload, corrected))
}
}
#[cfg(test)]
#[allow(
clippy::unwrap_used,
clippy::indexing_slicing,
clippy::useless_vec,
clippy::cast_possible_truncation,
clippy::expect_used,
reason = "test code"
)]
mod tests {
use super::*;
use test_log::test;
struct TempBlock {
file: std::fs::File,
handle: crate::table::BlockHandle,
#[cfg_attr(
not(feature = "page_ecc"),
expect(dead_code, reason = "drop guard; only read by page_ecc-gated tests")
)]
dir: tempfile::TempDir,
}
fn write_block_to_tempfile(
data: &[u8],
identity: BlockIdentity,
transform: &BlockTransform<'_>,
) -> crate::Result<TempBlock> {
let dir = tempfile::tempdir()?;
let path = dir.path().join("block");
let header = {
let mut file = std::fs::File::create(&path)?;
let header = Block::write_into(&mut file, data, identity, transform)?;
file.sync_all()?;
header
};
let file = std::fs::File::open(&path)?;
let handle = crate::table::BlockHandle::new(
BlockOffset(0),
header.on_disk_size_with(transform.ecc_params()),
);
Ok(TempBlock { file, handle, dir })
}
#[test]
fn block_from_file_roundtrip_uncompressed() -> crate::Result<()> {
let data = b"abcdefabcdefabcdef";
let tmp = write_block_to_tempfile(
data,
crate::table::block::BlockIdentity::for_test(0, BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
CompressionType::None,
None,
#[cfg(zstd_any)]
None,
)?,
)?;
let block = Block::from_file(
&tmp.file,
tmp.handle,
crate::table::block::BlockIdentity::for_test(0, crate::table::block::BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
CompressionType::None,
None,
#[cfg(zstd_any)]
None,
)?,
)?;
assert_eq!(data, &*block.data);
Ok(())
}
#[cfg(feature = "zstd")]
#[test]
fn read_data_frame_returns_decompressible_zstd_frame() -> crate::Result<()> {
let data: Vec<u8> = (0..40_000u32).map(|i| (i % 64) as u8).collect();
let transform =
crate::table::block::BlockTransform::from_parts(CompressionType::Zstd(3), None, None)?;
let tmp = write_block_to_tempfile(
&data,
crate::table::block::BlockIdentity::for_test(0, BlockType::Data),
&transform,
)?;
let (header, frame, _corrected) =
Block::read_data_frame(&tmp.file, tmp.handle, &transform)?;
assert!(
frame.len() < data.len(),
"frame must be compressed (got {} for {} bytes)",
frame.len(),
data.len(),
);
let decompressed = crate::compression::ZstdBackend::decompress(
&frame,
header.uncompressed_length as usize + 1,
)?;
assert_eq!(decompressed, data, "frame must decompress to the original");
Ok(())
}
#[cfg(feature = "zstd")]
#[test]
fn read_data_frame_rejects_oversized_handle() -> crate::Result<()> {
let data: Vec<u8> = (0..1_000u32).map(|i| (i % 64) as u8).collect();
let transform =
crate::table::block::BlockTransform::from_parts(CompressionType::Zstd(3), None, None)?;
let tmp = write_block_to_tempfile(
&data,
crate::table::block::BlockIdentity::for_test(0, BlockType::Data),
&transform,
)?;
let oversized = BlockHandle::new(tmp.handle.offset(), u32::MAX);
let err = Block::read_data_frame(&tmp.file, oversized, &transform)
.expect_err("oversized handle must be rejected");
assert!(
matches!(err, crate::Error::DecompressedSizeTooLarge { .. }),
"expected DecompressedSizeTooLarge, got {err:?}",
);
Ok(())
}
#[test]
#[cfg(feature = "lz4")]
fn block_from_file_roundtrip_lz4() -> crate::Result<()> {
let data = b"abcdefabcdefabcdef";
let tmp = write_block_to_tempfile(
data,
crate::table::block::BlockIdentity::for_test(0, BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
CompressionType::Lz4,
None,
#[cfg(zstd_any)]
None,
)?,
)?;
let block = Block::from_file(
&tmp.file,
tmp.handle,
crate::table::block::BlockIdentity::for_test(0, crate::table::block::BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
CompressionType::Lz4,
None,
#[cfg(zstd_any)]
None,
)?,
)?;
assert_eq!(data, &*block.data);
Ok(())
}
#[test]
#[cfg(zstd_any)]
fn block_from_file_roundtrip_zstd() -> crate::Result<()> {
let data = b"abcdefabcdefabcdef";
let tmp = write_block_to_tempfile(
data,
BlockIdentity::for_test(0, BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
CompressionType::Zstd(3),
None,
#[cfg(zstd_any)]
None,
)?,
)?;
let block = Block::from_file(
&tmp.file,
tmp.handle,
BlockIdentity::for_test(0, BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
CompressionType::Zstd(3),
None,
#[cfg(zstd_any)]
None,
)?,
)?;
assert_eq!(data, &*block.data);
Ok(())
}
#[test]
fn block_roundtrip_uncompressed() -> crate::Result<()> {
let mut writer = vec![];
Block::write_into(
&mut writer,
b"abcdefabcdefabcdef",
crate::table::block::BlockIdentity::for_test(0, BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
CompressionType::None,
None,
#[cfg(zstd_any)]
None,
)?,
)?;
{
let mut reader = &writer[..];
let block = Block::from_reader(
&mut reader,
crate::table::block::BlockIdentity::for_test(
0,
crate::table::block::BlockType::Data,
),
&crate::table::block::BlockTransform::from_parts(
CompressionType::None,
None,
#[cfg(zstd_any)]
None,
)?,
)?;
assert_eq!(b"abcdefabcdefabcdef", &*block.data);
}
Ok(())
}
#[test]
#[cfg(feature = "lz4")]
fn block_roundtrip_lz4() -> crate::Result<()> {
let mut writer = vec![];
Block::write_into(
&mut writer,
b"abcdefabcdefabcdef",
crate::table::block::BlockIdentity::for_test(0, BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
CompressionType::Lz4,
None,
#[cfg(zstd_any)]
None,
)?,
)?;
{
let mut reader = &writer[..];
let block = Block::from_reader(
&mut reader,
crate::table::block::BlockIdentity::for_test(
0,
crate::table::block::BlockType::Data,
),
&crate::table::block::BlockTransform::from_parts(
CompressionType::Lz4,
None,
#[cfg(zstd_any)]
None,
)?,
)?;
assert_eq!(b"abcdefabcdefabcdef", &*block.data);
}
Ok(())
}
#[test]
#[cfg(feature = "lz4")]
fn block_reject_absurd_uncompressed_length() {
use crate::coding::Encode;
let mut buf = vec![];
Block::write_into(
&mut buf,
b"hello",
crate::table::block::BlockIdentity::for_test(0, BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
CompressionType::Lz4,
None,
#[cfg(zstd_any)]
None,
)
.unwrap(),
)
.unwrap();
let mut reader = &buf[..];
let mut header = Header::decode_from(&mut reader).unwrap();
let compressed_payload: Vec<u8> = reader.to_vec();
header.uncompressed_length = u32::MAX;
let mut tampered = header.encode_into_vec();
tampered.extend_from_slice(&compressed_payload);
let mut r = &tampered[..];
let result = Block::from_reader(
&mut r,
crate::table::block::BlockIdentity::for_test(0, crate::table::block::BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
CompressionType::Lz4,
None,
#[cfg(zstd_any)]
None,
)
.unwrap(),
);
assert!(
matches!(&result, Err(crate::Error::DecompressedSizeTooLarge { .. })),
"expected DecompressedSizeTooLarge, got: {:?}",
result.err(),
);
}
#[test]
#[cfg(feature = "lz4")]
fn block_zero_uncompressed_length_with_data_fails_decompress() {
use crate::coding::Encode;
let mut buf = vec![];
Block::write_into(
&mut buf,
b"hello",
crate::table::block::BlockIdentity::for_test(0, BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
CompressionType::Lz4,
None,
#[cfg(zstd_any)]
None,
)
.unwrap(),
)
.unwrap();
let mut reader = &buf[..];
let mut header = Header::decode_from(&mut reader).unwrap();
let compressed_payload: Vec<u8> = reader.to_vec();
header.uncompressed_length = 0;
let mut tampered = header.encode_into_vec();
tampered.extend_from_slice(&compressed_payload);
let mut r = &tampered[..];
let result = Block::from_reader(
&mut r,
crate::table::block::BlockIdentity::for_test(0, crate::table::block::BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
CompressionType::Lz4,
None,
#[cfg(zstd_any)]
None,
)
.unwrap(),
);
assert!(
matches!(&result, Err(crate::Error::Decompress(_))),
"expected Decompress error, got: {:?}",
result.err(),
);
}
#[test]
#[cfg(feature = "lz4")]
fn lz4_corrupted_uncompressed_length_triggers_decompress_error() {
use crate::coding::Encode;
use std::io::Cursor;
let payload: &[u8] = b"hello world";
let compressed = lz4_flex::compress(payload);
let data_length = compressed.len() as u32;
let uncompressed_length_correct = payload.len() as u32;
let uncompressed_length_corrupted = uncompressed_length_correct + 1;
let checksum = Checksum::from_raw(crate::hash::hash128(&compressed));
let header = Header {
data_length,
uncompressed_length: uncompressed_length_corrupted,
checksum,
..Header::test_dummy(BlockType::Data)
};
let mut buf = header.encode_into_vec();
buf.extend_from_slice(&compressed);
let mut cursor = Cursor::new(buf);
let result = Block::from_reader(
&mut cursor,
crate::table::block::BlockIdentity::for_test(0, crate::table::block::BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
CompressionType::Lz4,
None,
#[cfg(zstd_any)]
None,
)
.unwrap(),
);
match result {
Err(crate::Error::Decompress(CompressionType::Lz4)) => { }
Ok(_) => panic!("expected Error::Decompress, but got Ok(Block)"),
Err(other) => panic!("expected Error::Decompress, got different error: {other:?}"),
}
}
#[test]
#[cfg(feature = "lz4")]
fn block_from_file_reject_absurd_uncompressed_length() {
use crate::coding::Encode;
use std::io::Write;
let mut buf = vec![];
Block::write_into(
&mut buf,
b"hello",
crate::table::block::BlockIdentity::for_test(0, BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
CompressionType::Lz4,
None,
#[cfg(zstd_any)]
None,
)
.unwrap(),
)
.unwrap();
let mut reader = &buf[..];
let mut header = Header::decode_from(&mut reader).unwrap();
let compressed_payload: Vec<u8> = reader.to_vec();
header.uncompressed_length = u32::MAX;
let mut tampered = header.encode_into_vec();
tampered.extend_from_slice(&compressed_payload);
let mut tmp = tempfile::NamedTempFile::new().unwrap();
tmp.write_all(&tampered).unwrap();
tmp.flush().unwrap();
let file = std::fs::File::open(tmp.path()).unwrap();
let handle = crate::table::BlockHandle::new(BlockOffset(0), tampered.len() as u32);
let result = Block::from_file(
&file,
handle,
crate::table::block::BlockIdentity::for_test(0, crate::table::block::BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
CompressionType::Lz4,
None,
#[cfg(zstd_any)]
None,
)
.unwrap(),
);
assert!(
matches!(&result, Err(crate::Error::DecompressedSizeTooLarge { .. })),
"expected DecompressedSizeTooLarge, got: {:?}",
result.err(),
);
}
#[test]
#[cfg(feature = "lz4")]
fn block_from_file_zero_uncompressed_length_with_data_fails_decompress() {
use crate::coding::Encode;
use std::io::Write;
let mut buf = vec![];
Block::write_into(
&mut buf,
b"hello",
crate::table::block::BlockIdentity::for_test(0, BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
CompressionType::Lz4,
None,
#[cfg(zstd_any)]
None,
)
.unwrap(),
)
.unwrap();
let mut reader = &buf[..];
let mut header = Header::decode_from(&mut reader).unwrap();
let compressed_payload: Vec<u8> = reader.to_vec();
header.uncompressed_length = 0;
let mut tampered = header.encode_into_vec();
tampered.extend_from_slice(&compressed_payload);
let mut tmp = tempfile::NamedTempFile::new().unwrap();
tmp.write_all(&tampered).unwrap();
tmp.flush().unwrap();
let file = std::fs::File::open(tmp.path()).unwrap();
let handle = crate::table::BlockHandle::new(BlockOffset(0), tampered.len() as u32);
let result = Block::from_file(
&file,
handle,
crate::table::block::BlockIdentity::for_test(0, crate::table::block::BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
CompressionType::Lz4,
None,
#[cfg(zstd_any)]
None,
)
.unwrap(),
);
assert!(
matches!(&result, Err(crate::Error::Decompress(_))),
"expected Decompress error, got: {:?}",
result.err(),
);
}
#[test]
fn block_from_reader_reject_absurd_data_length() {
use crate::coding::Encode;
let mut buf = vec![];
Block::write_into(
&mut buf,
b"hello",
crate::table::block::BlockIdentity::for_test(0, BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
CompressionType::None,
None,
#[cfg(zstd_any)]
None,
)
.unwrap(),
)
.unwrap();
let mut reader = &buf[..];
let mut header = Header::decode_from(&mut reader).unwrap();
let payload: Vec<u8> = reader.to_vec();
header.data_length = MAX_DECOMPRESSION_SIZE + 1;
let mut tampered = header.encode_into_vec();
tampered.extend_from_slice(&payload);
let mut r = &tampered[..];
let result = Block::from_reader(
&mut r,
crate::table::block::BlockIdentity::for_test(0, crate::table::block::BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
CompressionType::None,
None,
#[cfg(zstd_any)]
None,
)
.unwrap(),
);
assert!(
matches!(&result, Err(crate::Error::DecompressedSizeTooLarge { .. })),
"expected DecompressedSizeTooLarge, got: {:?}",
result.err(),
);
}
#[test]
fn block_from_file_reject_oversized_handle() {
use std::io::Write;
let mut tmp = tempfile::NamedTempFile::new().unwrap();
tmp.write_all(b"dummy").unwrap();
tmp.flush().unwrap();
let file = std::fs::File::open(tmp.path()).unwrap();
let handle = crate::table::BlockHandle::new(BlockOffset(0), u32::MAX);
let result = Block::from_file(
&file,
handle,
crate::table::block::BlockIdentity::for_test(0, crate::table::block::BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
CompressionType::None,
None,
#[cfg(zstd_any)]
None,
)
.unwrap(),
);
assert!(
matches!(&result, Err(crate::Error::DecompressedSizeTooLarge { .. })),
"expected DecompressedSizeTooLarge, got: {:?}",
result.err(),
);
}
#[test]
#[cfg(zstd_any)]
fn zstd_corrupted_uncompressed_length_triggers_decompress_error() {
use crate::coding::Encode;
use std::io::Cursor;
let payload: &[u8] = b"hello world";
let compressed =
crate::compression::ZstdBackend::compress(payload, 3).expect("zstd compress failed");
let data_length = compressed.len() as u32;
let uncompressed_length_corrupted = payload.len() as u32 + 1;
let checksum = Checksum::from_raw(crate::hash::hash128(&compressed));
let header = Header {
data_length,
uncompressed_length: uncompressed_length_corrupted,
checksum,
..Header::test_dummy(BlockType::Data)
};
let mut buf = header.encode_into_vec();
buf.extend_from_slice(&compressed);
let mut cursor = Cursor::new(buf);
let result = Block::from_reader(
&mut cursor,
crate::table::block::BlockIdentity::for_test(0, crate::table::block::BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
CompressionType::Zstd(3),
None,
#[cfg(zstd_any)]
None,
)
.unwrap(),
);
match result {
Err(crate::Error::Decompress(CompressionType::Zstd(_))) => { }
Ok(_) => panic!("expected Error::Decompress, but got Ok(Block)"),
Err(other) => panic!("expected Error::Decompress, got different error: {other:?}"),
}
}
#[test]
#[cfg(zstd_any)]
fn zstd_decreased_uncompressed_length_triggers_decompress_error() {
use crate::coding::Encode;
use std::io::Cursor;
let payload: &[u8] = b"hello world hello world hello world";
let compressed =
crate::compression::ZstdBackend::compress(payload, 3).expect("zstd compress failed");
let data_length = compressed.len() as u32;
let uncompressed_length_too_small = payload.len() as u32 - 1;
let checksum = Checksum::from_raw(crate::hash::hash128(&compressed));
let header = Header {
data_length,
uncompressed_length: uncompressed_length_too_small,
checksum,
..Header::test_dummy(BlockType::Data)
};
let mut buf = header.encode_into_vec();
buf.extend_from_slice(&compressed);
let mut cursor = Cursor::new(buf);
let result = Block::from_reader(
&mut cursor,
crate::table::block::BlockIdentity::for_test(0, crate::table::block::BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
CompressionType::Zstd(3),
None,
#[cfg(zstd_any)]
None,
)
.unwrap(),
);
match result {
Err(crate::Error::Decompress(CompressionType::Zstd(_))) => { }
Ok(_) => panic!("expected Error::Decompress, but got Ok(Block)"),
Err(other) => panic!("expected Error::Decompress, got different error: {other:?}"),
}
}
#[test]
#[cfg(zstd_any)]
fn block_roundtrip_zstd() -> crate::Result<()> {
let mut writer = vec![];
Block::write_into(
&mut writer,
b"abcdefabcdefabcdef",
crate::table::block::BlockIdentity::for_test(0, BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
CompressionType::Zstd(3),
None,
#[cfg(zstd_any)]
None,
)?,
)?;
{
let mut reader = &writer[..];
let block = Block::from_reader(
&mut reader,
crate::table::block::BlockIdentity::for_test(
0,
crate::table::block::BlockType::Data,
),
&crate::table::block::BlockTransform::from_parts(
CompressionType::Zstd(3),
None,
#[cfg(zstd_any)]
None,
)?,
)?;
assert_eq!(b"abcdefabcdefabcdef", &*block.data);
}
Ok(())
}
#[test]
fn block_write_rejects_oversized_payload() {
let oversized = vec![0u8; MAX_DECOMPRESSION_SIZE as usize + 1];
let mut sink = std::io::sink();
let result = Block::write_into(
&mut sink,
&oversized,
crate::table::block::BlockIdentity::for_test(0, BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
CompressionType::None,
None,
#[cfg(zstd_any)]
None,
)
.unwrap(),
);
assert!(
matches!(result, Err(crate::Error::DecompressedSizeTooLarge { .. })),
"expected DecompressedSizeTooLarge, got: {result:?}",
);
}
#[test]
#[cfg(zstd_any)]
fn block_roundtrip_zstd_large_data() -> crate::Result<()> {
let data = vec![0xABu8; 64 * 1024]; let mut writer = vec![];
Block::write_into(
&mut writer,
&data,
crate::table::block::BlockIdentity::for_test(0, BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
CompressionType::Zstd(3),
None,
#[cfg(zstd_any)]
None,
)?,
)?;
assert!(
writer.len() < data.len(),
"zstd should compress repeated data"
);
{
let mut reader = &writer[..];
let block = Block::from_reader(
&mut reader,
crate::table::block::BlockIdentity::for_test(
0,
crate::table::block::BlockType::Data,
),
&crate::table::block::BlockTransform::from_parts(
CompressionType::Zstd(3),
None,
#[cfg(zstd_any)]
None,
)?,
)?;
assert_eq!(&*block.data, &data[..]);
}
Ok(())
}
#[cfg(feature = "encryption")]
mod encrypted {
use crate::table::block::*;
fn test_provider() -> crate::encryption::Aes256GcmProvider {
crate::encryption::Aes256GcmProvider::new(&[0x42; 32])
}
#[test]
fn block_roundtrip_encrypted_uncompressed() -> crate::Result<()> {
let enc = test_provider();
let data = b"plaintext block data for encryption test";
let mut writer = vec![];
Block::write_into(
&mut writer,
data,
crate::table::block::BlockIdentity::for_test(0, BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
CompressionType::None,
Some(&enc),
#[cfg(zstd_any)]
None,
)?,
)?;
let mut reader = &writer[..];
let block = Block::from_reader(
&mut reader,
crate::table::block::BlockIdentity::for_test(
0,
crate::table::block::BlockType::Data,
),
&crate::table::block::BlockTransform::from_parts(
CompressionType::None,
Some(&enc),
#[cfg(zstd_any)]
None,
)?,
)?;
assert_eq!(data, &*block.data);
Ok(())
}
#[test]
#[cfg(feature = "lz4")]
fn block_roundtrip_encrypted_lz4() -> crate::Result<()> {
let enc = test_provider();
let data = b"abcdefabcdefabcdef";
let mut writer = vec![];
Block::write_into(
&mut writer,
data,
crate::table::block::BlockIdentity::for_test(0, BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
CompressionType::Lz4,
Some(&enc),
#[cfg(zstd_any)]
None,
)?,
)?;
let mut reader = &writer[..];
let block = Block::from_reader(
&mut reader,
crate::table::block::BlockIdentity::for_test(
0,
crate::table::block::BlockType::Data,
),
&crate::table::block::BlockTransform::from_parts(
CompressionType::Lz4,
Some(&enc),
#[cfg(zstd_any)]
None,
)?,
)?;
assert_eq!(data, &*block.data);
Ok(())
}
#[test]
#[cfg(zstd_any)]
fn block_roundtrip_encrypted_zstd() -> crate::Result<()> {
let enc = test_provider();
let data = b"abcdefabcdefabcdef";
let mut writer = vec![];
Block::write_into(
&mut writer,
data,
crate::table::block::BlockIdentity::for_test(0, BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
CompressionType::Zstd(3),
Some(&enc),
#[cfg(zstd_any)]
None,
)?,
)?;
let mut reader = &writer[..];
let block = Block::from_reader(
&mut reader,
crate::table::block::BlockIdentity::for_test(
0,
crate::table::block::BlockType::Data,
),
&crate::table::block::BlockTransform::from_parts(
CompressionType::Zstd(3),
Some(&enc),
#[cfg(zstd_any)]
None,
)?,
)?;
assert_eq!(data, &*block.data);
Ok(())
}
#[test]
fn block_from_file_encrypted_uncompressed() -> crate::Result<()> {
let enc = test_provider();
let data = b"plaintext block data for from_file encryption test";
let tmp = super::write_block_to_tempfile(
data,
crate::table::block::BlockIdentity::for_test(0, BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
CompressionType::None,
Some(&enc),
#[cfg(zstd_any)]
None,
)?,
)?;
let block = Block::from_file(
&tmp.file,
tmp.handle,
crate::table::block::BlockIdentity::for_test(
0,
crate::table::block::BlockType::Data,
),
&crate::table::block::BlockTransform::from_parts(
CompressionType::None,
Some(&enc),
#[cfg(zstd_any)]
None,
)?,
)?;
assert_eq!(data, &*block.data);
Ok(())
}
#[test]
#[cfg(feature = "lz4")]
fn block_from_file_encrypted_lz4() -> crate::Result<()> {
let enc = test_provider();
let data = b"abcdefabcdefabcdef";
let tmp = super::write_block_to_tempfile(
data,
crate::table::block::BlockIdentity::for_test(0, BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
CompressionType::Lz4,
Some(&enc),
#[cfg(zstd_any)]
None,
)?,
)?;
let block = Block::from_file(
&tmp.file,
tmp.handle,
crate::table::block::BlockIdentity::for_test(
0,
crate::table::block::BlockType::Data,
),
&crate::table::block::BlockTransform::from_parts(
CompressionType::Lz4,
Some(&enc),
#[cfg(zstd_any)]
None,
)?,
)?;
assert_eq!(data, &*block.data);
Ok(())
}
#[test]
#[cfg(zstd_any)]
fn block_from_file_encrypted_zstd() -> crate::Result<()> {
let enc = test_provider();
let data = b"abcdefabcdefabcdef";
let tmp = super::write_block_to_tempfile(
data,
crate::table::block::BlockIdentity::for_test(0, BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
CompressionType::Zstd(3),
Some(&enc),
#[cfg(zstd_any)]
None,
)?,
)?;
let block = Block::from_file(
&tmp.file,
tmp.handle,
crate::table::block::BlockIdentity::for_test(
0,
crate::table::block::BlockType::Data,
),
&crate::table::block::BlockTransform::from_parts(
CompressionType::Zstd(3),
Some(&enc),
#[cfg(zstd_any)]
None,
)?,
)?;
assert_eq!(data, &*block.data);
Ok(())
}
#[test]
fn block_from_file_encrypted_wrong_key_fails() -> crate::Result<()> {
let enc_write = test_provider();
let enc_read = crate::encryption::Aes256GcmProvider::new(&[0x99; 32]);
let data = b"encrypted block data";
let tmp = super::write_block_to_tempfile(
data,
crate::table::block::BlockIdentity::for_test(0, BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
CompressionType::None,
Some(&enc_write),
#[cfg(zstd_any)]
None,
)?,
)?;
let result = Block::from_file(
&tmp.file,
tmp.handle,
crate::table::block::BlockIdentity::for_test(
0,
crate::table::block::BlockType::Data,
),
&crate::table::block::BlockTransform::from_parts(
CompressionType::None,
Some(&enc_read),
#[cfg(zstd_any)]
None,
)?,
);
assert!(
matches!(result, Err(crate::Error::Decrypt(_))),
"expected Decrypt error for wrong key, got: {:?}",
result.err(),
);
Ok(())
}
#[test]
fn block_from_reader_encrypted_wrong_key_fails() -> crate::Result<()> {
let enc_write = test_provider();
let enc_read = crate::encryption::Aes256GcmProvider::new(&[0x99; 32]);
let data = b"encrypted block data";
let mut writer = vec![];
Block::write_into(
&mut writer,
data,
crate::table::block::BlockIdentity::for_test(0, BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
CompressionType::None,
Some(&enc_write),
#[cfg(zstd_any)]
None,
)?,
)?;
let mut reader = &writer[..];
let result = Block::from_reader(
&mut reader,
crate::table::block::BlockIdentity::for_test(
0,
crate::table::block::BlockType::Data,
),
&crate::table::block::BlockTransform::from_parts(
CompressionType::None,
Some(&enc_read),
#[cfg(zstd_any)]
None,
)?,
);
assert!(
matches!(result, Err(crate::Error::Decrypt(_))),
"expected Decrypt error for wrong key, got: {:?}",
result.err(),
);
Ok(())
}
#[test]
fn block_from_file_encrypted_checksum_tamper_detected() -> crate::Result<()> {
use std::io::Write;
let enc = test_provider();
let data = b"data for tamper test";
let mut buf = vec![];
let header = Block::write_into(
&mut buf,
data,
crate::table::block::BlockIdentity::for_test(0, BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
CompressionType::None,
Some(&enc),
#[cfg(zstd_any)]
None,
)?,
)?;
let mid = Header::MIN_LEN + 1;
if mid < buf.len() {
#[expect(clippy::indexing_slicing, reason = "mid < buf.len() checked above")]
{
buf[mid] ^= 0xFF;
}
}
let dir = tempfile::tempdir()?;
let path = dir.path().join("block");
let mut file = std::fs::File::create(&path)?;
file.write_all(&buf)?;
file.sync_all()?;
drop(file);
let file = std::fs::File::open(&path)?;
let handle = crate::table::BlockHandle::new(BlockOffset(0), header.on_disk_size());
let result = Block::from_file(
&file,
handle,
crate::table::block::BlockIdentity::for_test(
0,
crate::table::block::BlockType::Data,
),
&crate::table::block::BlockTransform::from_parts(
CompressionType::None,
Some(&enc),
#[cfg(zstd_any)]
None,
)?,
);
assert!(
matches!(result, Err(crate::Error::ChecksumMismatch { .. })),
"expected ChecksumMismatch for tampered data, got: {:?}",
result.err(),
);
Ok(())
}
#[test]
fn block_from_file_encrypted_undersized_handle_rejected() -> crate::Result<()> {
use std::io::Write;
let enc = test_provider();
let dir = tempfile::tempdir()?;
let path = dir.path().join("block");
let mut file = std::fs::File::create(&path)?;
file.write_all(b"tiny")?;
file.sync_all()?;
drop(file);
let file = std::fs::File::open(&path)?;
let handle = crate::table::BlockHandle::new(BlockOffset(0), 2);
let result = Block::from_file(
&file,
handle,
crate::table::block::BlockIdentity::for_test(
0,
crate::table::block::BlockType::Data,
),
&crate::table::block::BlockTransform::from_parts(
CompressionType::None,
Some(&enc),
#[cfg(zstd_any)]
None,
)?,
);
assert!(
matches!(result, Err(crate::Error::InvalidHeader(_))),
"expected InvalidHeader for undersized handle, got: {:?}",
result.err(),
);
Ok(())
}
#[test]
fn block_from_file_encrypted_uncompressed_large_payload() -> crate::Result<()> {
let enc = test_provider();
let data = vec![0xBB_u8; 32 * 1024]; let tmp = super::write_block_to_tempfile(
&data,
crate::table::block::BlockIdentity::for_test(0, BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
CompressionType::None,
Some(&enc),
#[cfg(zstd_any)]
None,
)?,
)?;
let block = Block::from_file(
&tmp.file,
tmp.handle,
crate::table::block::BlockIdentity::for_test(
0,
crate::table::block::BlockType::Data,
),
&crate::table::block::BlockTransform::from_parts(
CompressionType::None,
Some(&enc),
#[cfg(zstd_any)]
None,
)?,
)?;
assert_eq!(&*block.data, &data[..]);
Ok(())
}
#[test]
fn block_roundtrip_encrypted_uncompressed_large() -> crate::Result<()> {
let enc = test_provider();
let data = vec![0xCC_u8; 32 * 1024]; let mut writer = vec![];
Block::write_into(
&mut writer,
&data,
crate::table::block::BlockIdentity::for_test(0, BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
CompressionType::None,
Some(&enc),
#[cfg(zstd_any)]
None,
)?,
)?;
let mut reader = &writer[..];
let block = Block::from_reader(
&mut reader,
crate::table::block::BlockIdentity::for_test(
0,
crate::table::block::BlockType::Data,
),
&crate::table::block::BlockTransform::from_parts(
CompressionType::None,
Some(&enc),
#[cfg(zstd_any)]
None,
)?,
)?;
assert_eq!(&*block.data, &data[..]);
Ok(())
}
#[test]
#[cfg(feature = "lz4")]
fn block_roundtrip_encrypted_lz4_large() -> crate::Result<()> {
let enc = test_provider();
let data = vec![0xDD_u8; 32 * 1024]; let mut writer = vec![];
Block::write_into(
&mut writer,
&data,
crate::table::block::BlockIdentity::for_test(0, BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
CompressionType::Lz4,
Some(&enc),
#[cfg(zstd_any)]
None,
)?,
)?;
let mut reader = &writer[..];
let block = Block::from_reader(
&mut reader,
crate::table::block::BlockIdentity::for_test(
0,
crate::table::block::BlockType::Data,
),
&crate::table::block::BlockTransform::from_parts(
CompressionType::Lz4,
Some(&enc),
#[cfg(zstd_any)]
None,
)?,
)?;
assert_eq!(&*block.data, &data[..]);
Ok(())
}
#[test]
#[cfg(zstd_any)]
fn block_roundtrip_encrypted_zstd_large() -> crate::Result<()> {
let enc = test_provider();
let data = vec![0xEE_u8; 32 * 1024]; let mut writer = vec![];
Block::write_into(
&mut writer,
&data,
crate::table::block::BlockIdentity::for_test(0, BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
CompressionType::Zstd(3),
Some(&enc),
#[cfg(zstd_any)]
None,
)?,
)?;
let mut reader = &writer[..];
let block = Block::from_reader(
&mut reader,
crate::table::block::BlockIdentity::for_test(
0,
crate::table::block::BlockType::Data,
),
&crate::table::block::BlockTransform::from_parts(
CompressionType::Zstd(3),
Some(&enc),
#[cfg(zstd_any)]
None,
)?,
)?;
assert_eq!(&*block.data, &data[..]);
Ok(())
}
}
#[cfg(feature = "zstd")]
mod zstd_dict {
use super::*;
use crate::compression::ZstdDictionary;
use test_log::test;
fn test_dict() -> ZstdDictionary {
let mut samples = Vec::new();
for i in 0u32..500 {
samples.extend_from_slice(format!("key-{i:05}val-{i:05}").as_bytes());
}
ZstdDictionary::new(&samples)
}
fn test_compression(dict: &ZstdDictionary) -> CompressionType {
CompressionType::ZstdDict {
level: 3,
dict_id: dict.id(),
}
}
#[test]
fn block_roundtrip_zstd_dict_reader() -> crate::Result<()> {
let dict = test_dict();
let compression = test_compression(&dict);
let data = b"abcdefabcdefabcdef";
let mut writer = vec![];
Block::write_into(
&mut writer,
data,
crate::table::block::BlockIdentity::for_test(0, BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
compression,
None,
#[cfg(zstd_any)]
Some(&dict),
)?,
)?;
let mut reader = &writer[..];
let block = Block::from_reader(
&mut reader,
crate::table::block::BlockIdentity::for_test(
0,
crate::table::block::BlockType::Data,
),
&crate::table::block::BlockTransform::from_parts(
compression,
None,
#[cfg(zstd_any)]
Some(&dict),
)?,
)?;
assert_eq!(data, &*block.data);
Ok(())
}
#[test]
fn block_roundtrip_zstd_dict_file() -> crate::Result<()> {
use std::io::Write;
let dict = test_dict();
let compression = test_compression(&dict);
let data = b"abcdefabcdefabcdef";
let mut buf = vec![];
let header = Block::write_into(
&mut buf,
data,
crate::table::block::BlockIdentity::for_test(0, BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
compression,
None,
#[cfg(zstd_any)]
Some(&dict),
)?,
)?;
let dir = tempfile::tempdir()?;
let path = dir.path().join("block");
let mut file = std::fs::File::create(&path)?;
file.write_all(&buf)?;
file.sync_all()?;
drop(file);
let file = std::fs::File::open(&path)?;
let handle = crate::table::BlockHandle::new(BlockOffset(0), header.on_disk_size());
let block = Block::from_file(
&file,
handle,
crate::table::block::BlockIdentity::for_test(
0,
crate::table::block::BlockType::Data,
),
&crate::table::block::BlockTransform::from_parts(
compression,
None,
#[cfg(zstd_any)]
Some(&dict),
)?,
)?;
assert_eq!(data, &*block.data);
Ok(())
}
#[test]
fn block_roundtrip_zstd_dict_large_data() -> crate::Result<()> {
let dict = test_dict();
let compression = test_compression(&dict);
let data = vec![0xAB_u8; 64 * 1024]; let mut writer = vec![];
Block::write_into(
&mut writer,
&data,
crate::table::block::BlockIdentity::for_test(0, BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
compression,
None,
#[cfg(zstd_any)]
Some(&dict),
)?,
)?;
assert!(
writer.len() < data.len(),
"dict compression should reduce size"
);
let mut reader = &writer[..];
let block = Block::from_reader(
&mut reader,
crate::table::block::BlockIdentity::for_test(
0,
crate::table::block::BlockType::Data,
),
&crate::table::block::BlockTransform::from_parts(
compression,
None,
#[cfg(zstd_any)]
Some(&dict),
)?,
)?;
assert_eq!(&*block.data, &data[..]);
Ok(())
}
#[test]
fn block_zstd_dict_wrong_dict_returns_error() {
let dict = test_dict();
let compression = test_compression(&dict);
let wrong_dict = ZstdDictionary::new(b"completely different dictionary bytes");
let result = crate::table::block::BlockTransform::from_parts(
compression,
None,
Some(&wrong_dict),
);
assert!(
matches!(
result,
Err(crate::Error::ZstdDictMismatch { got: Some(_), .. })
),
"expected ZstdDictMismatch with got=Some",
);
}
#[test]
fn block_transform_from_parts_zstd_dict_missing_returns_error() {
let dict = test_dict();
let compression = test_compression(&dict);
let result = crate::table::block::BlockTransform::from_parts(compression, None, None);
assert!(
matches!(
&result,
Err(crate::Error::ZstdDictMismatch { got: None, .. })
),
"expected ZstdDictMismatch, got: {:?}",
result.as_ref().err(),
);
}
#[test]
#[cfg(feature = "encryption")]
fn block_roundtrip_zstd_dict_encrypted_reader() -> crate::Result<()> {
let enc = crate::Aes256GcmProvider::new(&[0x42; 32]);
let dict = test_dict();
let compression = test_compression(&dict);
let data = b"encrypted-dict-compressed-data-for-test";
let mut writer = vec![];
Block::write_into(
&mut writer,
data,
crate::table::block::BlockIdentity::for_test(0, BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
compression,
Some(&enc),
#[cfg(zstd_any)]
Some(&dict),
)?,
)?;
let mut reader = &writer[..];
let block = Block::from_reader(
&mut reader,
crate::table::block::BlockIdentity::for_test(
0,
crate::table::block::BlockType::Data,
),
&crate::table::block::BlockTransform::from_parts(
compression,
Some(&enc),
#[cfg(zstd_any)]
Some(&dict),
)?,
)?;
assert_eq!(data, &*block.data);
Ok(())
}
#[test]
#[cfg(feature = "encryption")]
fn block_roundtrip_zstd_dict_encrypted_file() -> crate::Result<()> {
use std::io::Write;
let enc = crate::Aes256GcmProvider::new(&[0x42; 32]);
let dict = test_dict();
let compression = test_compression(&dict);
let data = vec![0xCC_u8; 16 * 1024]; let mut buf = vec![];
let header = Block::write_into(
&mut buf,
&data,
crate::table::block::BlockIdentity::for_test(0, BlockType::Data),
&crate::table::block::BlockTransform::from_parts(
compression,
Some(&enc),
#[cfg(zstd_any)]
Some(&dict),
)?,
)?;
let dir = tempfile::tempdir()?;
let path = dir.path().join("block");
let mut file = std::fs::File::create(&path)?;
file.write_all(&buf)?;
file.sync_all()?;
drop(file);
let file = std::fs::File::open(&path)?;
let handle = crate::table::BlockHandle::new(BlockOffset(0), header.on_disk_size());
let block = Block::from_file(
&file,
handle,
crate::table::block::BlockIdentity::for_test(
0,
crate::table::block::BlockType::Data,
),
&crate::table::block::BlockTransform::from_parts(
compression,
Some(&enc),
#[cfg(zstd_any)]
Some(&dict),
)?,
)?;
assert_eq!(&*block.data, &data[..]);
Ok(())
}
}
#[cfg(feature = "page_ecc")]
mod page_ecc {
use super::*;
use test_log::test;
const PAYLOAD: &[u8] = b"the quick brown fox jumps over the lazy dog \
0123456789 the quick brown fox jumps over \
the lazy dog 0123456789";
#[test]
fn block_roundtrip_plain_ecc_clean_read() -> crate::Result<()> {
let mut writer = vec![];
let header = Block::write_into(
&mut writer,
PAYLOAD,
BlockIdentity::for_test(0, BlockType::Data),
&BlockTransform::PlainEcc(EccParams::RS_4_2),
)?;
assert!(
header.block_flags & crate::table::block::header::block_flags::ECC_PARITY != 0,
"PlainEcc writer must set the ECC_PARITY flag",
);
assert_eq!(
writer.len(),
header.on_disk_size() as usize,
"on-disk size must equal header + payload + derived parity length",
);
let mut reader = &writer[..];
let block = Block::from_reader(
&mut reader,
BlockIdentity::for_test(0, BlockType::Data),
&BlockTransform::PlainEcc(EccParams::RS_4_2),
)?;
assert_eq!(&*block.data, PAYLOAD);
Ok(())
}
#[test]
fn block_roundtrip_plain_ecc_recovers_from_single_byte_flip() -> crate::Result<()> {
let mut writer = vec![];
let header = Block::write_into(
&mut writer,
PAYLOAD,
BlockIdentity::for_test(0, BlockType::Data),
&BlockTransform::PlainEcc(EccParams::RS_4_2),
)?;
let header_len = Header::MIN_LEN;
let flip_at = header_len + (header.data_length as usize) / 2;
writer[flip_at] ^= 0xFF;
let mut reader = &writer[..];
let block = Block::from_reader(
&mut reader,
BlockIdentity::for_test(0, BlockType::Data),
&BlockTransform::PlainEcc(EccParams::RS_4_2),
)?;
assert_eq!(
&*block.data, PAYLOAD,
"Reed-Solomon recovery must reconstruct the original \
payload from a single-byte data-shard flip",
);
Ok(())
}
#[test]
fn block_roundtrip_secded_clean_read() -> crate::Result<()> {
let mut writer = vec![];
let header = Block::write_into(
&mut writer,
PAYLOAD,
BlockIdentity::for_test(0, BlockType::Data),
&BlockTransform::PlainEcc(EccParams::SECDED),
)?;
assert!(
header.block_flags & crate::table::block::header::block_flags::ECC_PARITY != 0,
"SECDED writer must set the ECC_PARITY flag",
);
assert_eq!(
writer.len(),
header.on_disk_size_with(Some(EccParams::SECDED)) as usize,
"on-disk size must equal header + payload + SECDED parity length",
);
let mut reader = &writer[..];
let block = Block::from_reader(
&mut reader,
BlockIdentity::for_test(0, BlockType::Data),
&BlockTransform::PlainEcc(EccParams::SECDED),
)?;
assert_eq!(&*block.data, PAYLOAD);
Ok(())
}
#[test]
fn block_roundtrip_secded_recovers_from_single_bit_flip() -> crate::Result<()> {
let mut writer = vec![];
let header = Block::write_into(
&mut writer,
PAYLOAD,
BlockIdentity::for_test(0, BlockType::Data),
&BlockTransform::PlainEcc(EccParams::SECDED),
)?;
let flip_at = Header::MIN_LEN + (header.data_length as usize) / 2;
writer[flip_at] ^= 0x01;
let mut reader = &writer[..];
let block = Block::from_reader(
&mut reader,
BlockIdentity::for_test(0, BlockType::Data),
&BlockTransform::PlainEcc(EccParams::SECDED),
)?;
assert_eq!(
&*block.data, PAYLOAD,
"SECDED must heal a single-bit payload flip",
);
Ok(())
}
#[test]
fn block_roundtrip_secded_unrecoverable_on_double_bit_flip() -> crate::Result<()> {
let mut writer = vec![];
let header = Block::write_into(
&mut writer,
PAYLOAD,
BlockIdentity::for_test(0, BlockType::Data),
&BlockTransform::PlainEcc(EccParams::SECDED),
)?;
let flip_at = Header::MIN_LEN + (header.data_length as usize) / 2;
writer[flip_at] ^= 0x03;
let mut reader = &writer[..];
let result = Block::from_reader(
&mut reader,
BlockIdentity::for_test(0, BlockType::Data),
&BlockTransform::PlainEcc(EccParams::SECDED),
);
assert!(
matches!(&result, Err(crate::Error::PageEccUnrecoverable { .. })),
"a double-bit error in one word must be detected as unrecoverable \
(got ok={})",
result.is_ok(),
);
Ok(())
}
#[test]
fn block_from_file_plain_ecc_recovers_from_single_byte_flip() -> crate::Result<()> {
let tmp = super::write_block_to_tempfile(
PAYLOAD,
BlockIdentity::for_test(0, BlockType::Data),
&BlockTransform::PlainEcc(EccParams::RS_4_2),
)?;
let path = tmp.dir.path().join("block");
let mut bytes = std::fs::read(&path)?;
let payload_start = Header::MIN_LEN;
bytes[payload_start + 3] ^= 0x80;
std::fs::write(&path, &bytes)?;
let file = std::fs::File::open(&path)?;
let block = Block::from_file(
&file,
tmp.handle,
BlockIdentity::for_test(0, BlockType::Data),
&BlockTransform::PlainEcc(EccParams::RS_4_2),
)?;
assert_eq!(&*block.data, PAYLOAD);
Ok(())
}
#[test]
fn from_file_with_status_reports_corrected_after_ecc_repair() -> crate::Result<()> {
let transform = BlockTransform::PlainEcc(EccParams::RS_4_2);
let tmp = super::write_block_to_tempfile(
PAYLOAD,
BlockIdentity::for_test(0, BlockType::Data),
&transform,
)?;
let path = tmp.dir.path().join("block");
{
let file = std::fs::File::open(&path)?;
let (block, status) = Block::from_file_with_status(
&file,
tmp.handle,
BlockIdentity::for_test(0, BlockType::Data),
&transform,
)?;
assert_eq!(&*block.data, PAYLOAD);
assert_eq!(status, EccStatus::Ok, "clean read must not flag a repair");
}
let mut bytes = std::fs::read(&path)?;
bytes[Header::MIN_LEN + 3] ^= 0x80;
std::fs::write(&path, &bytes)?;
let file = std::fs::File::open(&path)?;
let (block, status) = Block::from_file_with_status(
&file,
tmp.handle,
BlockIdentity::for_test(0, BlockType::Data),
&transform,
)?;
assert_eq!(&*block.data, PAYLOAD, "repaired bytes must equal original");
assert_eq!(
status,
EccStatus::Corrected,
"a read that repaired the block must report Corrected",
);
Ok(())
}
#[test]
fn from_file_with_status_soft_warns_on_unrecognized_trailer() -> crate::Result<()> {
let scheme = EccParams::try_new(8, 2).expect("valid shards");
let tmp = super::write_block_to_tempfile(
PAYLOAD,
BlockIdentity::for_test(0, BlockType::Data),
&BlockTransform::PlainEcc(scheme),
)?;
let (block, status) = Block::from_file_with_status(
&tmp.file,
tmp.handle,
BlockIdentity::for_test(0, BlockType::Data),
&BlockTransform::PlainEcc(scheme),
)?;
assert_eq!(&*block.data, PAYLOAD);
assert_eq!(status, EccStatus::Ok);
let (block, status) = Block::from_file_with_status(
&tmp.file,
tmp.handle,
BlockIdentity::for_test(0, BlockType::Data),
&BlockTransform::PLAIN,
)?;
assert_eq!(
&*block.data, PAYLOAD,
"payload reads despite unknown trailer"
);
assert_eq!(status, EccStatus::Unrecognized);
Ok(())
}
#[test]
fn from_file_recognized_empty_block_rejects_extra_trailer() -> crate::Result<()> {
let scheme = EccParams::try_new(8, 2).expect("valid shards");
let tmp = super::write_block_to_tempfile(
b"",
BlockIdentity::for_test(0, BlockType::Data),
&BlockTransform::PlainEcc(scheme),
)?;
let path = tmp.dir.path().join("block");
let base = tmp.handle.size();
let mut bytes = std::fs::read(&path)?;
bytes.extend_from_slice(&[0xAB, 0xCD, 0xEF, 0x01]);
std::fs::write(&path, &bytes)?;
let file = std::fs::File::open(&path)?;
let handle = crate::table::BlockHandle::new(crate::table::BlockOffset(0), base + 4);
let err = Block::from_file_with_status(
&file,
handle,
BlockIdentity::for_test(0, BlockType::Data),
&BlockTransform::PlainEcc(scheme),
)
.err();
assert!(
matches!(err, Some(crate::Error::InvalidHeader("Block"))),
"recognized zero-parity layout + extra trailer must fail, got {err:?}",
);
Ok(())
}
#[cfg(feature = "lz4")]
#[test]
fn block_roundtrip_compressed_ecc_recovers_from_byte_flip() -> crate::Result<()> {
let mut writer = vec![];
let header = Block::write_into(
&mut writer,
PAYLOAD,
BlockIdentity::for_test(0, BlockType::Data),
&BlockTransform::CompressedEcc(
CompressionContext::new(CompressionType::Lz4)?,
EccParams::RS_4_2,
),
)?;
assert!(header.block_flags & crate::table::block::header::block_flags::ECC_PARITY != 0);
let header_len = Header::MIN_LEN;
let flip_at = header_len + (header.data_length as usize) / 2;
writer[flip_at] ^= 0x55;
let mut reader = &writer[..];
let block = Block::from_reader(
&mut reader,
BlockIdentity::for_test(0, BlockType::Data),
&BlockTransform::CompressedEcc(
CompressionContext::new(CompressionType::Lz4)?,
EccParams::RS_4_2,
),
)?;
assert_eq!(
&*block.data, PAYLOAD,
"ECC must recover the compressed bytes BEFORE lz4 \
decompression, otherwise lz4 would fail on corrupt input",
);
Ok(())
}
#[cfg(feature = "encryption")]
#[test]
fn block_roundtrip_encrypted_ecc_recovers_from_byte_flip() -> crate::Result<()> {
let enc = crate::encryption::Aes256GcmProvider::new(&[0x42; 32]);
let mut writer = vec![];
let header = Block::write_into(
&mut writer,
PAYLOAD,
BlockIdentity::for_test(0, BlockType::Data),
&BlockTransform::EncryptedEcc(&enc, EccParams::RS_4_2),
)?;
assert!(header.block_flags & crate::table::block::header::block_flags::ECC_PARITY != 0);
let header_len = Header::MIN_LEN;
let flip_at = header_len + (header.data_length as usize) / 2;
writer[flip_at] ^= 0x21;
let mut reader = &writer[..];
let block = Block::from_reader(
&mut reader,
BlockIdentity::for_test(0, BlockType::Data),
&BlockTransform::EncryptedEcc(&enc, EccParams::RS_4_2),
)?;
assert_eq!(
&*block.data, PAYLOAD,
"ECC must reconstruct ciphertext byte-exactly so AEAD \
authentication succeeds on the recovered bytes",
);
Ok(())
}
#[test]
fn block_roundtrip_plain_ecc_unrecoverable_when_too_many_shards_corrupt()
-> crate::Result<()> {
let mut writer = vec![];
let header = Block::write_into(
&mut writer,
PAYLOAD,
BlockIdentity::for_test(0, BlockType::Data),
&BlockTransform::PlainEcc(EccParams::RS_4_2),
)?;
let payload_len = header.data_length as usize;
let shard_bytes = ((payload_len.div_ceil(4)) + 1) & !1;
let payload_start = Header::MIN_LEN;
for shard_idx in 0..3 {
let pos = payload_start + shard_idx * shard_bytes;
if pos < writer.len() {
writer[pos] ^= 0xFF;
}
}
let mut reader = &writer[..];
let result = Block::from_reader(
&mut reader,
BlockIdentity::for_test(0, BlockType::Data),
&BlockTransform::PlainEcc(EccParams::RS_4_2),
);
match result {
Ok(_) => panic!(
"3-shard corruption must exceed RS(4,2) recovery capacity, \
but from_reader returned Ok"
),
Err(crate::Error::PageEccUnrecoverable { .. }) => {}
Err(e) => panic!("expected PageEccUnrecoverable, got {e:?}"),
}
Ok(())
}
#[test]
fn ecc_parity_bit_agrees_with_emitted_parity_length() -> crate::Result<()> {
use crate::table::block::header::block_flags;
let mut empty_buf = vec![];
let empty = Block::write_into(
&mut empty_buf,
&[],
BlockIdentity::for_test(0, BlockType::Data),
&BlockTransform::PlainEcc(EccParams::RS_4_2),
)?;
assert_eq!(
empty.block_flags & block_flags::ECC_PARITY,
0,
"ECC_PARITY must be clear when no parity trailer is emitted",
);
assert_eq!(
empty_buf.len(),
empty.on_disk_size() as usize,
"on-disk size matches the derived (zero) parity length",
);
assert_eq!(
empty.on_disk_size() as usize,
Header::MIN_LEN,
"empty payload emits no parity, so on-disk size is just the header",
);
let mut full_buf = vec![];
let full = Block::write_into(
&mut full_buf,
PAYLOAD,
BlockIdentity::for_test(0, BlockType::Data),
&BlockTransform::PlainEcc(EccParams::RS_4_2),
)?;
assert_ne!(
full.block_flags & block_flags::ECC_PARITY,
0,
"ECC_PARITY must be set when a parity trailer is emitted",
);
assert_eq!(
full_buf.len(),
full.on_disk_size() as usize,
"on-disk size matches header + payload + derived parity",
);
assert!(
full.on_disk_size() as usize > Header::MIN_LEN + full.data_length as usize,
"non-empty payload emits a parity trailer beyond header + payload",
);
Ok(())
}
}
}