use crate::error::{Result, TensogramError};
use crate::wire::{FRAME_HEADER_SIZE, FrameFlags, FrameType, footer_size_for};
pub const HASH_ALGORITHM_NAME: &str = "xxh3";
pub fn parse_hash_name(name: Option<&str>) -> Result<bool> {
match name {
None => Ok(true),
Some(HASH_ALGORITHM_NAME) => Ok(true),
Some("none") => Ok(false),
Some(other) => Err(TensogramError::Metadata(format!(
"unknown hash type: {other}; expected \"{HASH_ALGORITHM_NAME}\" or \"none\""
))),
}
}
pub fn compute_hash(data: &[u8]) -> String {
format_xxh3_digest(xxhash_rust::xxh3::xxh3_64(data))
}
pub fn hash_frame_body(frame_bytes: &[u8], frame_type: FrameType) -> Result<u64> {
let footer = footer_size_for(frame_type);
let min_size = FRAME_HEADER_SIZE + footer;
if frame_bytes.len() < min_size {
return Err(TensogramError::Framing(format!(
"frame too small to hash: frame_bytes.len() = {} < header({}) + footer({}) = {}; \
for frame_type = {:?}. Likely truncated; re-read from source.",
frame_bytes.len(),
FRAME_HEADER_SIZE,
footer,
min_size,
frame_type,
)));
}
let body = &frame_bytes[FRAME_HEADER_SIZE..frame_bytes.len() - footer];
Ok(xxhash_rust::xxh3::xxh3_64(body))
}
pub fn check_frame_hash(frame_bytes: &[u8], frame_type: FrameType) -> Result<bool> {
use crate::wire::{FRAME_COMMON_FOOTER_SIZE, FRAME_END, read_u16_be, read_u64_be};
let frame_len = frame_bytes.len();
if frame_len < FRAME_HEADER_SIZE + FRAME_COMMON_FOOTER_SIZE {
return Err(TensogramError::Framing(format!(
"frame too small to read hash slot: frame_bytes.len() = {frame_len} \
< header({FRAME_HEADER_SIZE}) + common footer ({FRAME_COMMON_FOOTER_SIZE}); \
truncated or not a v3 frame"
)));
}
let endf_start = frame_len - FRAME_END.len();
if &frame_bytes[endf_start..frame_len] != FRAME_END {
return Err(TensogramError::Framing(
"frame missing ENDF marker while inspecting inline hash — \
likely truncated or not a v3 frame; re-read from source"
.to_string(),
));
}
let header_flags = read_u16_be(frame_bytes, 6);
if header_flags & FrameFlags::HASH_PRESENT == 0 {
return Ok(false);
}
let slot_start = frame_len - FRAME_COMMON_FOOTER_SIZE;
let stored = read_u64_be(frame_bytes, slot_start);
let computed = hash_frame_body(frame_bytes, frame_type)?;
if computed != stored {
return Err(TensogramError::HashMismatch {
object_index: None,
expected: format_xxh3_digest(stored),
actual: format_xxh3_digest(computed),
});
}
Ok(true)
}
pub fn verify_frame_hash(
frame_bytes: &[u8],
frame_type: FrameType,
object_index: Option<usize>,
) -> Result<()> {
match check_frame_hash(frame_bytes, frame_type)? {
true => Ok(()),
false => Err(TensogramError::MissingHash {
object_index: object_index.unwrap_or(0),
}),
}
}
#[inline]
pub(crate) fn format_xxh3_digest(digest: u64) -> String {
format!("{digest:016x}")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_xxh3_compute_hash_is_deterministic() {
let data = b"hello world";
let hash = compute_hash(data);
assert_eq!(hash.len(), 16); assert_eq!(hash, compute_hash(data));
}
#[test]
fn parse_hash_name_default_is_on() {
assert!(parse_hash_name(None).unwrap());
}
#[test]
fn parse_hash_name_accepts_xxh3() {
assert!(parse_hash_name(Some("xxh3")).unwrap());
assert!(parse_hash_name(Some(HASH_ALGORITHM_NAME)).unwrap());
}
#[test]
fn parse_hash_name_accepts_none() {
assert!(!parse_hash_name(Some("none")).unwrap());
}
#[test]
fn parse_hash_name_rejects_unknown() {
let err = parse_hash_name(Some("sha256")).unwrap_err();
match err {
TensogramError::Metadata(msg) => {
assert!(msg.contains("sha256"), "msg: {msg}");
assert!(msg.contains("xxh3"), "msg: {msg}");
assert!(msg.contains("none"), "msg: {msg}");
}
other => panic!("expected Metadata error, got: {other:?}"),
}
}
#[test]
fn parse_hash_name_rejects_uppercase() {
let err = parse_hash_name(Some("XXH3")).unwrap_err();
assert!(matches!(err, TensogramError::Metadata(_)));
}
#[test]
fn hash_frame_body_rejects_below_minimum_size() {
let buf = vec![0u8; 30];
let err = hash_frame_body(&buf, FrameType::NTensorFrame).unwrap_err();
let msg = err.to_string();
assert!(msg.contains("frame too small to hash"));
assert!(msg.contains("frame_bytes.len() = 30"));
assert!(msg.contains("NTensorFrame"));
}
#[test]
fn check_frame_hash_rejects_below_minimum_size() {
let buf = vec![0u8; 10];
let err = check_frame_hash(&buf, FrameType::HeaderMetadata).unwrap_err();
assert!(
err.to_string()
.contains("frame too small to read hash slot")
);
}
#[test]
fn check_frame_hash_rejects_missing_endf() {
use crate::wire::FRAME_HEADER_SIZE;
let mut buf = vec![0u8; FRAME_HEADER_SIZE + 12];
buf[FRAME_HEADER_SIZE + 8..FRAME_HEADER_SIZE + 12].copy_from_slice(b"XXXX");
let err = check_frame_hash(&buf, FrameType::HeaderMetadata).unwrap_err();
assert!(err.to_string().contains("ENDF"));
}
fn build_header_frame(flags: u16, body: &[u8], slot_value: u64) -> Vec<u8> {
use crate::wire::{FRAME_END, FRAME_HEADER_SIZE, FRAME_MAGIC};
let mut buf = Vec::new();
buf.extend_from_slice(FRAME_MAGIC);
buf.extend_from_slice(&1u16.to_be_bytes()); buf.extend_from_slice(&1u16.to_be_bytes()); buf.extend_from_slice(&flags.to_be_bytes());
let total_length = FRAME_HEADER_SIZE + body.len() + 12;
buf.extend_from_slice(&(total_length as u64).to_be_bytes());
buf.extend_from_slice(body);
buf.extend_from_slice(&slot_value.to_be_bytes());
buf.extend_from_slice(FRAME_END);
buf
}
#[test]
fn check_frame_hash_returns_false_when_flag_clear_regardless_of_slot() {
let body = b"hello";
let true_digest = xxhash_rust::xxh3::xxh3_64(body);
let buf = build_header_frame(0, body, 0);
assert!(!check_frame_hash(&buf, FrameType::HeaderMetadata).unwrap());
let buf = build_header_frame(0, body, true_digest);
assert!(!check_frame_hash(&buf, FrameType::HeaderMetadata).unwrap());
let buf = build_header_frame(0, body, 0xDEADBEEF);
assert!(!check_frame_hash(&buf, FrameType::HeaderMetadata).unwrap());
}
#[test]
fn check_frame_hash_returns_true_on_valid_match() {
let body = b"hello";
let true_digest = xxhash_rust::xxh3::xxh3_64(body);
let buf = build_header_frame(FrameFlags::HASH_PRESENT, body, true_digest);
assert!(check_frame_hash(&buf, FrameType::HeaderMetadata).unwrap());
}
#[test]
fn check_frame_hash_accepts_legitimate_zero_digest() {
let body: &[u8] = b"";
let real_digest = xxhash_rust::xxh3::xxh3_64(body);
assert_ne!(real_digest, 0, "test invariant: xxh3(\"\") != 0");
let buf = build_header_frame(FrameFlags::HASH_PRESENT, body, 0);
let err = check_frame_hash(&buf, FrameType::HeaderMetadata).unwrap_err();
assert!(matches!(
err,
TensogramError::HashMismatch {
object_index: None,
..
}
));
}
#[test]
fn check_frame_hash_reports_mismatch_on_tampered_slot() {
let body = b"hello";
let buf = build_header_frame(FrameFlags::HASH_PRESENT, body, 0xDEADBEEFCAFEBABE);
let err = check_frame_hash(&buf, FrameType::HeaderMetadata).unwrap_err();
match err {
TensogramError::HashMismatch {
object_index,
expected,
actual,
} => {
assert!(object_index.is_none());
assert_eq!(expected, "deadbeefcafebabe");
assert_ne!(actual, expected);
}
other => panic!("expected HashMismatch, got: {other:?}"),
}
}
#[test]
fn verify_frame_hash_strict_wrapper_treats_clear_flag_as_missing_hash() {
let body = b"hello";
let buf = build_header_frame(0, body, 0);
let err = verify_frame_hash(&buf, FrameType::HeaderMetadata, None).unwrap_err();
assert!(matches!(err, TensogramError::MissingHash { .. }));
}
#[test]
fn verify_frame_hash_strict_wrapper_passes_when_flag_set_and_slot_matches() {
let body = b"hello";
let true_digest = xxhash_rust::xxh3::xxh3_64(body);
let buf = build_header_frame(FrameFlags::HASH_PRESENT, body, true_digest);
verify_frame_hash(&buf, FrameType::HeaderMetadata, None).unwrap();
}
}