use std::sync::Arc;
use vyre_foundation::ir::model::expr::Ident;
use vyre_foundation::ir::{BufferAccess, BufferDecl, DataType, Expr, Node, Program};
pub const ADLER32_MOD: u32 = 65_521;
pub const ADLER32_OP_ID: &str = "vyre-primitives::hash::adler32";
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct Adler32Chunk {
pub len_mod: u32,
pub a: u32,
pub b: u32,
}
#[must_use]
pub fn adler32(bytes: &[u8]) -> u32 {
let chunk = adler32_chunk(bytes);
adler32_finalize_state(chunk.a, chunk.b)
}
#[must_use]
pub fn adler32_chunk(bytes: &[u8]) -> Adler32Chunk {
let mut a = adler32_initial_a_state();
let mut b = adler32_initial_b_state();
for &byte in bytes {
let next = adler32_update_byte_state(a, b, byte);
a = next.0;
b = next.1;
}
Adler32Chunk {
len_mod: (bytes.len() % ADLER32_MOD as usize) as u32,
a,
b,
}
}
#[must_use]
pub fn adler32_combine_state(a: u32, b: u32, chunk: Adler32Chunk) -> (u32, u32) {
let modulus = u64::from(ADLER32_MOD);
let a_minus_one = (u64::from(a) + modulus - 1) % modulus;
let combined_a = adler32_mod_u64(u64::from(a) + u64::from(chunk.a) + modulus - 1);
let combined_b =
adler32_mod_u64(u64::from(b) + u64::from(chunk.b) + u64::from(chunk.len_mod) * a_minus_one);
(combined_a, combined_b)
}
#[must_use]
pub fn adler32_combine_chunks(left: Adler32Chunk, right: Adler32Chunk) -> Adler32Chunk {
let (a, b) = adler32_combine_state(left.a, left.b, right);
Adler32Chunk {
len_mod: adler32_mod_u64(u64::from(left.len_mod) + u64::from(right.len_mod)),
a,
b,
}
}
#[must_use]
pub const fn adler32_initial_a_state() -> u32 {
1
}
#[must_use]
pub const fn adler32_initial_b_state() -> u32 {
0
}
#[must_use]
pub const fn adler32_update_byte_state(a: u32, b: u32, byte: u8) -> (u32, u32) {
let a = (a + byte as u32) % ADLER32_MOD;
let b = (b + a) % ADLER32_MOD;
(a, b)
}
#[must_use]
pub const fn adler32_finalize_state(a: u32, b: u32) -> u32 {
(b << 16) | a
}
fn adler32_mod_u64(value: u64) -> u32 {
(value % u64::from(ADLER32_MOD)) as u32
}
#[must_use]
pub fn adler32_initial_a_expr() -> Expr {
Expr::u32(adler32_initial_a_state())
}
#[must_use]
pub fn adler32_initial_b_expr() -> Expr {
Expr::u32(adler32_initial_b_state())
}
#[must_use]
pub fn adler32_update_byte_nodes(a_var: &str, b_var: &str, byte: Expr) -> [Node; 2] {
let byte = Expr::bitand(byte, Expr::u32(0xFF));
[
Node::assign(
a_var,
Expr::rem(Expr::add(Expr::var(a_var), byte), Expr::u32(ADLER32_MOD)),
),
Node::assign(
b_var,
Expr::rem(
Expr::add(Expr::var(b_var), Expr::var(a_var)),
Expr::u32(ADLER32_MOD),
),
),
]
}
#[must_use]
pub fn adler32_finalize_expr(a: Expr, b: Expr) -> Expr {
Expr::bitor(Expr::shl(b, Expr::u32(16)), a)
}
#[must_use]
pub fn adler32_program(input: &str, out: &str, n: u32) -> Program {
let body = vec![Node::Region {
generator: Ident::from(ADLER32_OP_ID),
source_region: None,
body: Arc::new(adler32_body(input, out, n)),
}];
Program::wrapped(
vec![
BufferDecl::storage(input, 0, BufferAccess::ReadOnly, DataType::U32).with_count(n),
BufferDecl::output(out, 1, DataType::U32).with_count(1),
],
[1, 1, 1],
body,
)
}
fn adler32_body(input: &str, out: &str, n: u32) -> Vec<Node> {
vec![Node::if_then(
Expr::eq(Expr::InvocationId { axis: 0 }, Expr::u32(0)),
vec![
Node::let_bind("a", adler32_initial_a_expr()),
Node::let_bind("b", adler32_initial_b_expr()),
Node::loop_for(
"i",
Expr::u32(0),
Expr::u32(n),
adler32_update_byte_nodes("a", "b", Expr::load(input, Expr::var("i"))).into(),
),
Node::store(
out,
Expr::u32(0),
adler32_finalize_expr(Expr::var("a"), Expr::var("b")),
),
],
)]
}
#[cfg(feature = "inventory-registry")]
inventory::submit! {
crate::harness::OpEntry::new(
ADLER32_OP_ID,
|| adler32_program("input", "out", 3),
Some(|| {
let bytes = crate::wire::pack_bytes_as_u32_slice(b"abc");
vec![vec![bytes, vec![0u8; 4]]]
}),
Some(|| vec![vec![0x024D_0127u32.to_le_bytes().to_vec()]]),
)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn abc_matches_rfc1950_example() {
assert_eq!(adler32(b"abc"), 0x024D_0127);
}
#[test]
fn wikipedia_string() {
assert_eq!(adler32(b"Wikipedia"), 0x11E6_0398);
}
#[test]
fn state_helpers_match_slice_hasher() {
let bytes = b"vyre-adler-single-source";
let mut a = adler32_initial_a_state();
let mut b = adler32_initial_b_state();
for &byte in bytes {
let next = adler32_update_byte_state(a, b, byte);
a = next.0;
b = next.1;
}
assert_eq!(adler32_finalize_state(a, b), adler32(bytes));
}
#[test]
fn chunk_summary_matches_slice_hasher() {
let bytes = b"vyre-adler-gpu-tree-reduction";
let chunk = adler32_chunk(bytes);
assert_eq!(adler32_finalize_state(chunk.a, chunk.b), adler32(bytes));
assert_eq!(chunk.len_mod, bytes.len() as u32);
}
#[test]
fn chunk_combine_matches_serial_hash_for_all_splits() {
let bytes = b"adler chunks are composable enough for gpu block scans";
for split in 0..=bytes.len() {
let left = adler32_chunk(&bytes[..split]);
let right = adler32_chunk(&bytes[split..]);
let combined = adler32_combine_chunks(left, right);
assert_eq!(
adler32_finalize_state(combined.a, combined.b),
adler32(bytes),
"split {split}"
);
}
}
#[test]
fn chunk_combine_is_associative_for_generated_payloads() {
for len in 0..96usize {
let bytes = (0..len)
.map(|i| ((i * 37 + len * 11) & 0xFF) as u8)
.collect::<Vec<_>>();
for split_a in 0..=len {
for split_b in split_a..=len {
let a = adler32_chunk(&bytes[..split_a]);
let b = adler32_chunk(&bytes[split_a..split_b]);
let c = adler32_chunk(&bytes[split_b..]);
let left_assoc = adler32_combine_chunks(adler32_combine_chunks(a, b), c);
let right_assoc = adler32_combine_chunks(a, adler32_combine_chunks(b, c));
assert_eq!(
left_assoc, right_assoc,
"len {len}, splits {split_a}/{split_b}"
);
assert_eq!(
adler32_finalize_state(left_assoc.a, left_assoc.b),
adler32(&bytes),
"len {len}, splits {split_a}/{split_b}"
);
}
}
}
}
#[test]
fn update_helper_masks_high_input_bits() {
let nodes = adler32_update_byte_nodes("a", "b", Expr::u32(0xFFFF_FF61));
let rendered = format!("{nodes:?}");
assert!(
rendered.contains("255"),
"Fix: Adler-32 IR helper must mask each u32 slot to one byte."
);
}
}