#[derive(Debug, Clone)]
pub struct Adler32 {
a: u32,
b: u32,
#[allow(dead_code)]
block_size: usize,
n_mod_table: [u32; 256],
}
const MOD_ADLER: u32 = 65521;
impl Adler32 {
pub fn new(block_size: usize) -> Self {
let mut n_mod_table = [0u32; 256];
let n = block_size as u32;
for (i, entry) in n_mod_table.iter_mut().enumerate() {
*entry = (n * (i as u32)) % MOD_ADLER;
}
Self { a: 1, b: 0, block_size, n_mod_table }
}
pub fn hash(data: &[u8]) -> u32 {
let mut a: u32 = 1;
let mut b: u32 = 0;
const CHUNK_SIZE: usize = 3800;
for chunk in data.chunks(CHUNK_SIZE) {
for &byte in chunk {
a += byte as u32;
b += a;
}
a %= MOD_ADLER;
b %= MOD_ADLER;
}
(b << 16) | a
}
pub fn update_block(&mut self, block: &[u8]) {
self.a = 1;
self.b = 0;
const CHUNK_SIZE: usize = 3800;
for chunk in block.chunks(CHUNK_SIZE) {
for &byte in chunk {
self.a += byte as u32;
self.b += self.a;
}
self.a %= MOD_ADLER;
self.b %= MOD_ADLER;
}
}
#[inline]
pub fn roll(&mut self, old_byte: u8, new_byte: u8) {
let old = old_byte as u32;
let new = new_byte as u32;
let mut a = self.a + new;
if old > a {
a += MOD_ADLER;
}
a -= old;
if a >= MOD_ADLER {
a -= MOD_ADLER;
}
self.a = a;
let n_old_mod = self.n_mod_table[old_byte as usize];
let mut b = self.b + self.a;
let sub = n_old_mod + 1;
if sub > b {
b += MOD_ADLER * 2; }
b -= sub;
while b >= MOD_ADLER {
b -= MOD_ADLER;
}
self.b = b;
}
pub fn digest(&self) -> u32 {
(self.b << 16) | self.a
}
#[allow(dead_code)]
pub fn reset(&mut self) {
self.a = 1;
self.b = 0;
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_adler32_basic() {
let data = b"hello world";
let hash = Adler32::hash(data);
assert_ne!(hash, 0);
assert_ne!(hash, 1);
}
#[test]
fn test_adler32_deterministic() {
let data = b"test data 123";
assert_eq!(Adler32::hash(data), Adler32::hash(data));
}
#[test]
fn test_adler32_rolling() {
let data = b"abcdefghijklmnop";
let block_size = 4;
let mut hasher = Adler32::new(block_size);
hasher.update_block(&data[0..4]); let hash1 = hasher.digest();
hasher.roll(data[0], data[4]); let hash2 = hasher.digest();
let expected = Adler32::hash(&data[1..5]); assert_eq!(hash2, expected);
assert_ne!(hash1, hash2);
}
#[test]
fn test_adler32_rolling_correctness() {
let data = b"The quick brown fox jumps over the lazy dog";
let block_size = 8;
let mut hasher = Adler32::new(block_size);
hasher.update_block(&data[0..block_size]);
for i in 1..=(data.len() - block_size) {
hasher.roll(data[i - 1], data[i + block_size - 1]);
let expected = Adler32::hash(&data[i..i + block_size]);
assert_eq!(hasher.digest(), expected, "Rolling hash mismatch at position {}", i);
}
}
#[test]
fn test_adler32_different_data() {
assert_ne!(Adler32::hash(b"abc"), Adler32::hash(b"def"));
assert_ne!(Adler32::hash(b"test"), Adler32::hash(b"TEST"));
}
#[test]
fn test_adler32_empty() {
let hash = Adler32::hash(b"");
assert_eq!(hash, 1); }
#[test]
fn test_adler32_rolling_large_block() {
let block_size = 128 * 1024;
let mut data = vec![0u8; block_size * 2];
for (i, byte) in data.iter_mut().enumerate() {
*byte = (i % 256) as u8;
}
let mut hasher = Adler32::new(block_size);
hasher.update_block(&data[0..block_size]);
hasher.roll(data[0], data[block_size]);
let expected = Adler32::hash(&data[1..block_size + 1]);
assert_eq!(hasher.digest(), expected);
}
#[test]
fn test_adler32_rolling_all_zeros() {
let data = [0u8; 100];
let block_size = 10;
let mut hasher = Adler32::new(block_size);
hasher.update_block(&data[0..block_size]);
for i in 1..=(data.len() - block_size) {
hasher.roll(data[i - 1], data[i + block_size - 1]);
let expected = Adler32::hash(&data[i..i + block_size]);
assert_eq!(hasher.digest(), expected);
}
}
#[test]
fn test_adler32_rolling_all_ones() {
let data = [0xFF; 100];
let block_size = 16;
let mut hasher = Adler32::new(block_size);
hasher.update_block(&data[0..block_size]);
for i in 1..=(data.len() - block_size) {
hasher.roll(data[i - 1], data[i + block_size - 1]);
let expected = Adler32::hash(&data[i..i + block_size]);
assert_eq!(hasher.digest(), expected);
}
}
#[test]
fn test_adler32_rolling_repeating_pattern() {
let pattern = b"ABCD";
let mut data = Vec::with_capacity(100 * pattern.len());
for _ in 0..100 {
data.extend_from_slice(pattern);
}
let block_size = 32;
let mut hasher = Adler32::new(block_size);
hasher.update_block(&data[0..block_size]);
for i in 1..=(data.len() - block_size) {
hasher.roll(data[i - 1], data[i + block_size - 1]);
let expected = Adler32::hash(&data[i..i + block_size]);
assert_eq!(hasher.digest(), expected, "Mismatch at position {} with repeating pattern", i);
}
}
#[test]
fn test_adler32_rolling_modulo_boundary() {
let block_size = 256;
let data = vec![0xFF; block_size * 3];
let mut hasher = Adler32::new(block_size);
hasher.update_block(&data[0..block_size]);
for i in 1..block_size {
hasher.roll(data[i - 1], data[i + block_size - 1]);
let expected = Adler32::hash(&data[i..i + block_size]);
assert_eq!(hasher.digest(), expected, "Modulo boundary test failed at position {}", i);
}
}
}