use std::io;
use log::debug;
use crate::store::IndexOutput;
use crate::store::checksum::CRC32;
pub(crate) const FOOTER_LENGTH: usize = 16;
pub(crate) const FOOTER_MAGIC: i32 = !(0x3FD76C17_u32 as i32);
pub(crate) fn write_footer(out: &mut dyn IndexOutput) -> io::Result<()> {
out.write_be_int(FOOTER_MAGIC)?;
out.write_be_int(0)?; let checksum = out.checksum();
debug!("write_footer: checksum=0x{checksum:08x}");
out.write_be_long(checksum as i64)?;
Ok(())
}
pub(crate) fn verify_checksum(bytes: &[u8]) -> io::Result<()> {
if bytes.len() < FOOTER_LENGTH {
return Err(io::Error::other(format!(
"file too short for footer: {} < {FOOTER_LENGTH}",
bytes.len()
)));
}
let footer_start = bytes.len() - FOOTER_LENGTH;
let magic = read_be_i32(&bytes[footer_start..footer_start + 4]);
if magic != FOOTER_MAGIC {
return Err(io::Error::other(format!(
"footer magic mismatch: expected 0x{:08X}, got 0x{:08X}",
FOOTER_MAGIC as u32, magic as u32,
)));
}
let algorithm_id = read_be_i32(&bytes[footer_start + 4..footer_start + 8]);
if algorithm_id != 0 {
return Err(io::Error::other(format!(
"unsupported checksum algorithm: {algorithm_id}"
)));
}
let stored_crc = read_be_i64(&bytes[footer_start + 8..footer_start + 16]) as u64;
let mut crc = CRC32::new();
crc.update(&bytes[..bytes.len() - 8]);
let computed = crc.value();
if computed != stored_crc {
return Err(io::Error::other(format!(
"checksum mismatch: stored=0x{stored_crc:016X}, computed=0x{computed:016X}",
)));
}
Ok(())
}
pub(crate) fn retrieve_checksum(bytes: &[u8]) -> io::Result<i64> {
if bytes.len() < FOOTER_LENGTH {
return Err(io::Error::other(format!(
"misplaced codec footer (file truncated?): length={} but footerLength=={FOOTER_LENGTH}",
bytes.len()
)));
}
let footer_start = bytes.len() - FOOTER_LENGTH;
let magic = read_be_i32(&bytes[footer_start..footer_start + 4]);
if magic != FOOTER_MAGIC {
return Err(io::Error::other(format!(
"codec footer mismatch (file truncated?): actual footer=0x{:08X} vs expected footer=0x{:08X}",
magic as u32, FOOTER_MAGIC as u32,
)));
}
let algorithm_id = read_be_i32(&bytes[footer_start + 4..footer_start + 8]);
if algorithm_id != 0 {
return Err(io::Error::other(format!(
"codec footer mismatch: unknown algorithmID: {algorithm_id}"
)));
}
let checksum = read_be_i64(&bytes[footer_start + 8..footer_start + 16]);
if (checksum as u64) & 0xFFFF_FFFF_0000_0000 != 0 {
return Err(io::Error::other(format!(
"illegal CRC-32 checksum: {checksum}"
)));
}
Ok(checksum)
}
pub(crate) fn retrieve_checksum_with_length(bytes: &[u8], expected_length: i64) -> io::Result<i64> {
if expected_length < FOOTER_LENGTH as i64 {
return Err(io::Error::other(
"expectedLength cannot be less than the footer length",
));
}
let actual = bytes.len() as i64;
if actual < expected_length {
return Err(io::Error::other(format!(
"truncated file: length={actual} but expectedLength=={expected_length}"
)));
} else if actual > expected_length {
return Err(io::Error::other(format!(
"file too long: length={actual} but expectedLength=={expected_length}"
)));
}
retrieve_checksum(bytes)
}
fn read_be_i32(bytes: &[u8]) -> i32 {
let arr: [u8; 4] = bytes.try_into().expect("slice of length 4");
i32::from_be_bytes(arr)
}
fn read_be_i64(bytes: &[u8]) -> i64 {
let arr: [u8; 8] = bytes.try_into().expect("slice of length 8");
i64::from_be_bytes(arr)
}
#[cfg(test)]
mod tests {
use super::*;
fn with_valid_footer(data: &[u8]) -> Vec<u8> {
let mut buf = Vec::with_capacity(data.len() + FOOTER_LENGTH);
buf.extend_from_slice(data);
buf.extend_from_slice(&FOOTER_MAGIC.to_be_bytes());
buf.extend_from_slice(&0i32.to_be_bytes());
let mut crc = CRC32::new();
crc.update(&buf);
buf.extend_from_slice(&(crc.value() as i64).to_be_bytes());
buf
}
#[test]
fn valid_footer_over_nonempty_data() {
let bytes = with_valid_footer(b"hello world");
verify_checksum(&bytes).unwrap();
}
#[test]
fn valid_footer_over_empty_data() {
let bytes = with_valid_footer(b"");
assert_len_eq_x!(&bytes, FOOTER_LENGTH);
verify_checksum(&bytes).unwrap();
}
#[test]
fn too_short_errors() {
assert_err!(verify_checksum(&[]));
assert_err!(verify_checksum(&[0u8; 15]));
}
#[test]
fn bad_magic_errors() {
let mut bytes = with_valid_footer(b"hello");
let footer_start = bytes.len() - FOOTER_LENGTH;
bytes[footer_start] ^= 0xFF;
assert_err!(verify_checksum(&bytes));
}
#[test]
fn bad_algorithm_errors() {
let mut bytes = with_valid_footer(b"hello");
let footer_start = bytes.len() - FOOTER_LENGTH;
bytes[footer_start + 4] = 0xFF;
assert_err!(verify_checksum(&bytes));
}
#[test]
fn wrong_stored_crc_errors() {
let mut bytes = with_valid_footer(b"hello");
let last = bytes.len() - 1;
bytes[last] ^= 0xFF;
assert_err!(verify_checksum(&bytes));
}
#[test]
fn corrupted_data_errors() {
let mut bytes = with_valid_footer(b"hello");
bytes[0] ^= 0xFF;
assert_err!(verify_checksum(&bytes));
}
#[test]
fn retrieve_checksum_returns_stored_value() {
let bytes = with_valid_footer(b"hello world");
let mut crc = CRC32::new();
crc.update(&bytes[..bytes.len() - 8]);
let expected = crc.value() as i64;
assert_eq!(retrieve_checksum(&bytes).unwrap(), expected);
}
#[test]
fn retrieve_checksum_too_short_errors() {
assert_err!(retrieve_checksum(&[]));
assert_err!(retrieve_checksum(&[0u8; 15]));
}
#[test]
fn retrieve_checksum_bad_magic_errors() {
let mut bytes = with_valid_footer(b"hello");
let footer_start = bytes.len() - FOOTER_LENGTH;
bytes[footer_start] ^= 0xFF;
assert_err!(retrieve_checksum(&bytes));
}
#[test]
fn retrieve_checksum_bad_algorithm_errors() {
let mut bytes = with_valid_footer(b"hello");
let footer_start = bytes.len() - FOOTER_LENGTH;
bytes[footer_start + 4] = 0xFF;
assert_err!(retrieve_checksum(&bytes));
}
#[test]
fn retrieve_checksum_does_not_verify_crc() {
let mut bytes = with_valid_footer(b"hello");
bytes[0] ^= 0xFF;
assert_ok!(retrieve_checksum(&bytes));
}
#[test]
fn retrieve_checksum_with_length_matches() {
let bytes = with_valid_footer(b"hello world");
assert_ok!(retrieve_checksum_with_length(&bytes, bytes.len() as i64));
}
#[test]
fn retrieve_checksum_with_length_too_short() {
let bytes = with_valid_footer(b"hello world");
assert_err!(retrieve_checksum_with_length(
&bytes,
bytes.len() as i64 + 1
));
}
#[test]
fn retrieve_checksum_with_length_too_long() {
let bytes = with_valid_footer(b"hello world");
assert_err!(retrieve_checksum_with_length(
&bytes,
bytes.len() as i64 - 1
));
}
#[test]
fn retrieve_checksum_with_length_below_footer_errors() {
let bytes = with_valid_footer(b"hello");
assert_err!(retrieve_checksum_with_length(&bytes, 8));
}
use std::io::Write;
use crate::store::DataOutput;
use crate::store::memory::MemoryIndexOutput;
#[test]
fn test_write_footer() {
let mut out = MemoryIndexOutput::new("test".to_string());
out.write_all(b"hello").unwrap();
write_footer(&mut out).unwrap();
let bytes = out.bytes();
assert_len_eq_x!(&bytes, 21);
let footer_start = 5;
assert_eq!(bytes[footer_start], 0xc0);
assert_eq!(bytes[footer_start + 1], 0x28);
assert_eq!(bytes[footer_start + 2], 0x93);
assert_eq!(bytes[footer_start + 3], 0xe8);
assert_eq!(&bytes[footer_start + 4..footer_start + 8], &[0, 0, 0, 0]);
assert_eq!(&bytes[footer_start + 8..footer_start + 12], &[0, 0, 0, 0]);
}
#[test]
fn test_footer_covers_preceding_bytes() {
let mut out = MemoryIndexOutput::new("test".to_string());
out.write_all(b"test data").unwrap();
let checksum_before_crc = {
let mut out2 = MemoryIndexOutput::new("test2".to_string());
out2.write_all(b"test data").unwrap();
out2.write_be_int(FOOTER_MAGIC).unwrap();
out2.write_be_int(0).unwrap();
out2.checksum()
};
write_footer(&mut out).unwrap();
let bytes = out.bytes();
let footer_crc_offset = bytes.len() - 8;
let written_crc = u64::from_be_bytes(
bytes[footer_crc_offset..footer_crc_offset + 8]
.try_into()
.unwrap(),
);
assert_eq!(written_crc, checksum_before_crc);
}
}