use crate::error::RiegeliError;
use crate::hash::highway_hash_64;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[repr(u8)]
pub(crate) enum ChunkType {
FileSignature = b's',
FileMetadata = b'm',
Padding = b'p',
Simple = b'r',
Transposed = b't',
}
impl TryFrom<u8> for ChunkType {
type Error = RiegeliError;
fn try_from(b: u8) -> Result<Self, Self::Error> {
match b {
b's' => Ok(ChunkType::FileSignature),
b'm' => Ok(ChunkType::FileMetadata),
b'p' => Ok(ChunkType::Padding),
b'r' => Ok(ChunkType::Simple),
b't' => Ok(ChunkType::Transposed),
_ => Err(RiegeliError::UnknownChunkType(b)),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct ChunkHeader {
pub(crate) header_hash: u64,
pub(crate) data_size: u64,
pub(crate) data_hash: u64,
pub(crate) chunk_type_and_num_records: u64,
pub(crate) decoded_data_size: u64,
}
impl ChunkHeader {
pub fn from_parts(
data: &[u8],
chunk_type: ChunkType,
num_records: u64,
decoded_data_size: u64,
) -> Self {
let data_size = data.len() as u64;
let data_hash = highway_hash_64(data);
let chunk_type_and_num_records = (num_records << 8) | (chunk_type as u64);
let mut body = [0u8; 32];
body[0..8].copy_from_slice(&data_size.to_le_bytes());
body[8..16].copy_from_slice(&data_hash.to_le_bytes());
body[16..24].copy_from_slice(&chunk_type_and_num_records.to_le_bytes());
body[24..32].copy_from_slice(&decoded_data_size.to_le_bytes());
let header_hash = highway_hash_64(&body);
Self {
header_hash,
data_size,
data_hash,
chunk_type_and_num_records,
decoded_data_size,
}
}
pub fn from_bytes(bytes: [u8; 40]) -> Self {
let header_hash = u64::from_le_bytes(bytes[0..8].try_into().unwrap());
let data_size = u64::from_le_bytes(bytes[8..16].try_into().unwrap());
let data_hash = u64::from_le_bytes(bytes[16..24].try_into().unwrap());
let chunk_type_and_num_records = u64::from_le_bytes(bytes[24..32].try_into().unwrap());
let decoded_data_size = u64::from_le_bytes(bytes[32..40].try_into().unwrap());
Self {
header_hash,
data_size,
data_hash,
chunk_type_and_num_records,
decoded_data_size,
}
}
pub fn to_bytes(self) -> [u8; 40] {
let mut bytes = [0u8; 40];
bytes[0..8].copy_from_slice(&self.header_hash.to_le_bytes());
bytes[8..16].copy_from_slice(&self.data_size.to_le_bytes());
bytes[16..24].copy_from_slice(&self.data_hash.to_le_bytes());
bytes[24..32].copy_from_slice(&self.chunk_type_and_num_records.to_le_bytes());
bytes[32..40].copy_from_slice(&self.decoded_data_size.to_le_bytes());
bytes
}
pub fn data_size(&self) -> u64 {
self.data_size
}
#[allow(dead_code)]
pub fn data_hash(&self) -> u64 {
self.data_hash
}
pub fn chunk_type(&self) -> Result<ChunkType, RiegeliError> {
let byte = (self.chunk_type_and_num_records & 0xff) as u8;
match byte {
b's' => Ok(ChunkType::FileSignature),
b'm' => Ok(ChunkType::FileMetadata),
b'p' => Ok(ChunkType::Padding),
b'r' => Ok(ChunkType::Simple),
b't' => Ok(ChunkType::Transposed),
_ => Err(RiegeliError::MalformedData(format!(
"unknown chunk type byte: {byte:#04x}"
))),
}
}
pub fn num_records(&self) -> u64 {
self.chunk_type_and_num_records >> 8
}
pub fn decoded_data_size(&self) -> u64 {
self.decoded_data_size
}
#[allow(dead_code)]
pub fn stored_hash(&self) -> u64 {
self.header_hash
}
pub fn is_header_valid(&self) -> bool {
let bytes = self.to_bytes();
let computed = highway_hash_64(&bytes[8..40]);
self.header_hash == computed
}
pub fn is_data_valid(&self, data: &[u8]) -> bool {
self.data_hash == highway_hash_64(data)
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_header() -> (ChunkHeader, Vec<u8>) {
let data = b"hello, riegeli!".to_vec();
let h = ChunkHeader::from_parts(&data, ChunkType::Simple, 1, data.len() as u64);
(h, data)
}
#[test]
fn from_parts_header_valid() {
let (h, _) = make_header();
assert!(h.is_header_valid());
}
#[test]
fn from_parts_data_valid() {
let (h, data) = make_header();
assert!(h.is_data_valid(&data));
}
#[test]
fn pack_unpack_chunk_type_and_num_records() {
let data = b"test";
let chunk_type = ChunkType::Simple;
let num_records = 42u64;
let h = ChunkHeader::from_parts(data, chunk_type, num_records, data.len() as u64);
assert_eq!(h.chunk_type().unwrap(), chunk_type);
assert_eq!(h.num_records(), num_records);
}
#[test]
fn pack_unpack_large_num_records() {
let data = b"";
let num_records = (1u64 << 48) - 1;
let h = ChunkHeader::from_parts(data, ChunkType::FileSignature, num_records, 0);
assert_eq!(h.num_records(), num_records);
assert_eq!(h.chunk_type().unwrap(), ChunkType::FileSignature);
}
#[test]
fn round_trip() {
let (original, _) = make_header();
let bytes = original.to_bytes();
let restored = ChunkHeader::from_bytes(bytes);
assert_eq!(original, restored);
assert!(restored.is_header_valid());
}
#[test]
fn accessors() {
let data = b"some data";
let h = ChunkHeader::from_parts(data, ChunkType::Simple, 5, 100);
assert_eq!(h.data_size(), data.len() as u64);
assert_eq!(h.decoded_data_size(), 100);
assert_eq!(h.num_records(), 5);
}
#[test]
fn chunk_type_try_from_unknown_returns_err() {
use crate::error::RiegeliError;
let unknown_bytes: &[u8] = &[0x00, 0x01, 0x7f, 0xfe, 0xff, b'x', b'a'];
for &b in unknown_bytes {
let result = ChunkType::try_from(b);
assert!(
matches!(result, Err(RiegeliError::UnknownChunkType(_))),
"expected UnknownChunkType for byte {b:#04x}, got {result:?}"
);
}
}
#[test]
fn chunk_type_file_metadata_recognized() {
let h = ChunkHeader::from_parts(&[], ChunkType::FileMetadata, 0, 0);
assert_eq!(h.chunk_type().unwrap(), ChunkType::FileMetadata);
}
#[test]
fn criterion_2_metadata_chunk_at_offset_64() {
use crate::constants::{BLOCK_HEADER_SIZE, CHUNK_HEADER_SIZE};
use crate::record_writer::{RecordWriter, WriterOptions};
use std::io::{Seek, SeekFrom, Write};
struct VecWriter {
data: Vec<u8>,
}
impl Write for VecWriter {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
self.data.extend_from_slice(buf);
Ok(buf.len())
}
fn flush(&mut self) -> std::io::Result<()> {
Ok(())
}
}
impl Seek for VecWriter {
fn seek(&mut self, _: SeekFrom) -> std::io::Result<u64> {
Ok(self.data.len() as u64)
}
}
let payload = b"schema-v1".to_vec();
let mut w = VecWriter { data: Vec::new() };
{
let mut writer = RecordWriter::new(
&mut w,
WriterOptions::new().set_serialized_metadata(payload.clone()),
)
.expect("writer");
writer.write_record(b"hello").expect("write");
writer.close().expect("close");
}
let data = w.data;
let metadata_chunk_pos = BLOCK_HEADER_SIZE + CHUNK_HEADER_SIZE; assert!(
data.len() > metadata_chunk_pos as usize + 40,
"file too short to contain metadata chunk"
);
let ch_bytes: [u8; 40] = data
[metadata_chunk_pos as usize..metadata_chunk_pos as usize + 40]
.try_into()
.unwrap();
let ch = ChunkHeader::from_bytes(ch_bytes);
assert!(ch.is_header_valid(), "chunk header hash invalid");
assert_eq!(
ch.chunk_type().unwrap(),
ChunkType::FileMetadata,
"expected FileMetadata chunk at offset 64"
);
let data_start = metadata_chunk_pos as usize + 40;
let data_end = data_start + ch.data_size() as usize;
assert!(data.len() >= data_end, "file truncated");
assert_eq!(&data[data_start..data_end], payload.as_slice());
}
#[test]
fn criterion_8_check_file_format_does_not_decompress() {
use crate::hash::highway_hash_64;
use crate::record_reader::{ReaderOptions, RecordReader};
use crate::record_writer::{RecordWriter, WriterOptions};
use std::io::{Cursor, Seek, SeekFrom, Write};
struct VecWriter {
data: Vec<u8>,
}
impl Write for VecWriter {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
self.data.extend_from_slice(buf);
Ok(buf.len())
}
fn flush(&mut self) -> std::io::Result<()> {
Ok(())
}
}
impl Seek for VecWriter {
fn seek(&mut self, _: SeekFrom) -> std::io::Result<u64> {
Ok(self.data.len() as u64)
}
}
let mut w = VecWriter { data: Vec::new() };
{
let mut writer = RecordWriter::new(&mut w, WriterOptions::new()).expect("writer");
writer.write_record(b"hello").expect("write");
writer.write_record(b"world").expect("write");
writer.close().expect("close");
}
let mut tampered = w.data;
let ch_start = 64usize;
let ch_bytes: [u8; 40] = tampered[ch_start..ch_start + 40].try_into().unwrap();
let ch = ChunkHeader::from_bytes(ch_bytes);
let data_size = ch.data_size() as usize;
let data_start = ch_start + 40;
let garbage: Vec<u8> = (0..data_size)
.map(|i| (i as u8).wrapping_mul(17) ^ 0xAB)
.collect();
tampered[data_start..data_start + data_size].copy_from_slice(&garbage);
let new_data_hash = highway_hash_64(&garbage);
let new_dh_bytes = new_data_hash.to_le_bytes();
tampered[ch_start + 16..ch_start + 24].copy_from_slice(&new_dh_bytes);
let header_body: [u8; 32] = tampered[ch_start + 8..ch_start + 40].try_into().unwrap();
let new_header_hash = highway_hash_64(&header_body);
let new_hh_bytes = new_header_hash.to_le_bytes();
tampered[ch_start..ch_start + 8].copy_from_slice(&new_hh_bytes);
let mut reader =
RecordReader::new(Cursor::new(tampered), ReaderOptions::new()).expect("reader");
reader
.check_file_format()
.expect("check_file_format should succeed even with garbage-but-hash-valid data");
}
}