#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct RollingChecksum {
a: u32,
b: u32,
count: usize,
}
impl RollingChecksum {
const MOD: u32 = 65521;
#[must_use]
#[allow(clippy::cast_possible_truncation)]
pub fn new(data: &[u8]) -> Self {
let mut a: u32 = 0;
let mut b: u32 = 0;
let len = data.len();
for (i, &byte) in data.iter().enumerate() {
a = a.wrapping_add(u32::from(byte));
b = b.wrapping_add((len - i) as u32 * u32::from(byte));
}
Self {
a: a % Self::MOD,
b: b % Self::MOD,
count: len,
}
}
#[must_use]
pub const fn empty() -> Self {
Self {
a: 0,
b: 0,
count: 0,
}
}
#[inline]
#[allow(clippy::cast_possible_truncation)]
pub fn roll(&mut self, old_byte: u8, new_byte: u8) {
let old = u32::from(old_byte);
let new = u32::from(new_byte);
self.a = (self.a.wrapping_sub(old).wrapping_add(new)) % Self::MOD;
self.b = (self
.b
.wrapping_sub(self.count as u32 * old)
.wrapping_add(self.a))
% Self::MOD;
}
#[inline]
pub fn push(&mut self, byte: u8) {
let val = u32::from(byte);
self.a = (self.a.wrapping_add(val)) % Self::MOD;
self.b = (self.b.wrapping_add(self.a)) % Self::MOD;
self.count += 1;
}
#[inline]
#[must_use]
pub const fn digest(&self) -> u32 {
(self.b << 16) | self.a
}
#[inline]
#[must_use]
pub const fn len(&self) -> usize {
self.count
}
#[inline]
#[must_use]
pub const fn is_empty(&self) -> bool {
self.count == 0
}
#[inline]
#[must_use]
pub const fn sum_a(&self) -> u32 {
self.a
}
#[inline]
#[must_use]
pub const fn sum_b(&self) -> u32 {
self.b
}
}
#[derive(Debug, Clone, Copy)]
pub struct FastRollingChecksum {
a: u64,
b: u64,
count: usize,
rolls: u32,
}
impl FastRollingChecksum {
const MOD: u64 = 65521;
const NORMALIZE_INTERVAL: u32 = 5000;
#[must_use]
#[allow(clippy::cast_possible_truncation)]
pub fn new(data: &[u8]) -> Self {
let mut a: u64 = 0;
let mut b: u64 = 0;
let len = data.len();
for (i, &byte) in data.iter().enumerate() {
a += u64::from(byte);
b += (len - i) as u64 * u64::from(byte);
}
Self {
a: a % Self::MOD,
b: b % Self::MOD,
count: len,
rolls: 0,
}
}
#[must_use]
pub const fn empty() -> Self {
Self {
a: 0,
b: 0,
count: 0,
rolls: 0,
}
}
#[inline]
#[allow(clippy::cast_possible_truncation)]
pub fn roll(&mut self, old_byte: u8, new_byte: u8) {
let old = u64::from(old_byte);
let new = u64::from(new_byte);
self.a = self.a + Self::MOD + new - old;
self.b = self.b + Self::MOD * (self.count as u64) + self.a - self.count as u64 * old;
self.rolls += 1;
if self.rolls >= Self::NORMALIZE_INTERVAL {
self.a %= Self::MOD;
self.b %= Self::MOD;
self.rolls = 0;
}
}
#[inline]
pub fn push(&mut self, byte: u8) {
let val = u64::from(byte);
self.a += val;
self.b += self.a;
self.count += 1;
self.rolls += 1;
if self.rolls >= Self::NORMALIZE_INTERVAL {
self.a %= Self::MOD;
self.b %= Self::MOD;
self.rolls = 0;
}
}
#[inline]
#[must_use]
#[allow(clippy::cast_possible_truncation)]
pub fn digest(&self) -> u32 {
let a = (self.a % Self::MOD) as u32;
let b = (self.b % Self::MOD) as u32;
(b << 16) | a
}
#[inline]
#[must_use]
pub const fn len(&self) -> usize {
self.count
}
#[inline]
#[must_use]
pub const fn is_empty(&self) -> bool {
self.count == 0
}
}
impl Default for RollingChecksum {
fn default() -> Self {
Self::empty()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn new_empty_slice() {
let checksum = RollingChecksum::new(b"");
assert_eq!(checksum.digest(), 0);
assert_eq!(checksum.len(), 0);
assert!(checksum.is_empty());
}
#[test]
fn new_single_byte() {
let checksum = RollingChecksum::new(b"a");
assert_ne!(checksum.digest(), 0);
assert_eq!(checksum.len(), 1);
assert!(!checksum.is_empty());
}
#[test]
fn new_multiple_bytes() {
let checksum = RollingChecksum::new(b"hello");
assert_ne!(checksum.digest(), 0);
assert_eq!(checksum.len(), 5);
}
#[test]
fn empty_constructor() {
let checksum = RollingChecksum::empty();
assert_eq!(checksum.digest(), 0);
assert_eq!(checksum.len(), 0);
assert!(checksum.is_empty());
}
#[test]
fn default_is_empty() {
let checksum = RollingChecksum::default();
assert_eq!(checksum, RollingChecksum::empty());
}
#[test]
fn digest_deterministic() {
let data = b"test data for checksum";
let checksum1 = RollingChecksum::new(data);
let checksum2 = RollingChecksum::new(data);
assert_eq!(checksum1.digest(), checksum2.digest());
}
#[test]
fn different_data_different_digest() {
let checksum1 = RollingChecksum::new(b"hello");
let checksum2 = RollingChecksum::new(b"world");
assert_ne!(checksum1.digest(), checksum2.digest());
}
#[test]
fn sum_components_accessible() {
let checksum = RollingChecksum::new(b"test");
assert!(checksum.sum_a() < RollingChecksum::MOD);
assert!(checksum.sum_b() < RollingChecksum::MOD);
}
#[test]
fn roll_single_byte_window() {
let mut checksum = RollingChecksum::new(b"a");
checksum.roll(b'a', b'b');
let direct = RollingChecksum::new(b"b");
assert_eq!(checksum.digest(), direct.digest());
}
#[test]
fn roll_preserves_window_size() {
let mut checksum = RollingChecksum::new(b"abcd");
let original_len = checksum.len();
checksum.roll(b'a', b'e');
assert_eq!(checksum.len(), original_len);
}
#[test]
fn roll_multiple_times() {
let mut checksum = RollingChecksum::new(b"abcd");
checksum.roll(b'a', b'e');
checksum.roll(b'b', b'f');
let direct = RollingChecksum::new(b"cdef");
assert_eq!(checksum.digest(), direct.digest());
}
#[test]
fn roll_full_window_replacement() {
let mut checksum = RollingChecksum::new(b"aaaa");
checksum.roll(b'a', b'b');
checksum.roll(b'a', b'b');
checksum.roll(b'a', b'b');
checksum.roll(b'a', b'b');
let direct = RollingChecksum::new(b"bbbb");
assert_eq!(checksum.digest(), direct.digest());
}
#[test]
fn roll_with_same_byte() {
let mut checksum = RollingChecksum::new(b"aaaa");
let original = checksum.digest();
checksum.roll(b'a', b'a');
assert_eq!(checksum.digest(), original);
}
#[test]
fn push_to_empty() {
let mut checksum = RollingChecksum::empty();
checksum.push(b'a');
assert_eq!(checksum.len(), 1);
assert_ne!(checksum.digest(), 0);
}
#[test]
fn push_multiple() {
let mut checksum = RollingChecksum::empty();
for &byte in b"hello" {
checksum.push(byte);
}
assert_eq!(checksum.len(), 5);
}
#[test]
fn push_increases_length() {
let mut checksum = RollingChecksum::new(b"test");
let original_len = checksum.len();
checksum.push(b'!');
assert_eq!(checksum.len(), original_len + 1);
}
#[test]
fn all_zeros() {
let data = [0u8; 100];
let checksum = RollingChecksum::new(&data);
assert_eq!(checksum.digest(), 0);
}
#[test]
fn all_ones() {
let data = [1u8; 100];
let checksum = RollingChecksum::new(&data);
assert_ne!(checksum.digest(), 0);
}
#[test]
fn all_max_bytes() {
let data = [255u8; 100];
let checksum = RollingChecksum::new(&data);
assert_ne!(checksum.digest(), 0);
assert!(checksum.sum_a() < RollingChecksum::MOD);
assert!(checksum.sum_b() < RollingChecksum::MOD);
}
#[test]
fn large_window() {
let data = vec![42u8; 65536]; let checksum = RollingChecksum::new(&data);
assert_eq!(checksum.len(), 65536);
assert!(checksum.sum_a() < RollingChecksum::MOD);
assert!(checksum.sum_b() < RollingChecksum::MOD);
}
#[test]
fn binary_data() {
let data: Vec<u8> = (0..=255).collect();
let checksum = RollingChecksum::new(&data);
assert_eq!(checksum.len(), 256);
assert_ne!(checksum.digest(), 0);
}
#[test]
fn clone_equals_original() {
let original = RollingChecksum::new(b"test data");
let cloned = original;
assert_eq!(original, cloned);
assert_eq!(original.digest(), cloned.digest());
}
#[test]
fn debug_format() {
let checksum = RollingChecksum::new(b"test");
let debug = format!("{checksum:?}");
assert!(debug.contains("RollingChecksum"));
assert!(debug.contains("a:"));
assert!(debug.contains("b:"));
}
#[test]
fn mod_invariant_a() {
let data = vec![255u8; 10000];
let checksum = RollingChecksum::new(&data);
assert!(checksum.sum_a() < RollingChecksum::MOD);
}
#[test]
fn mod_invariant_b() {
let data = vec![255u8; 10000];
let checksum = RollingChecksum::new(&data);
assert!(checksum.sum_b() < RollingChecksum::MOD);
}
#[test]
fn roll_maintains_mod_invariant() {
let mut checksum = RollingChecksum::new(&[255u8; 1000]);
for _ in 0..1000 {
checksum.roll(255, 255);
assert!(checksum.sum_a() < RollingChecksum::MOD);
assert!(checksum.sum_b() < RollingChecksum::MOD);
}
}
}
#[cfg(test)]
mod proptests {
use super::*;
use proptest::prelude::*;
proptest! {
#[test]
fn roll_preserves_length(
data in prop::collection::vec(any::<u8>(), 1..100),
old in any::<u8>(),
new in any::<u8>()
) {
let mut checksum = RollingChecksum::new(&data);
let original_len = checksum.len();
checksum.roll(old, new);
prop_assert_eq!(checksum.len(), original_len);
}
#[test]
fn a_always_bounded(data in prop::collection::vec(any::<u8>(), 0..1000)) {
let checksum = RollingChecksum::new(&data);
prop_assert!(checksum.sum_a() < RollingChecksum::MOD);
}
#[test]
fn b_always_bounded(data in prop::collection::vec(any::<u8>(), 0..1000)) {
let checksum = RollingChecksum::new(&data);
prop_assert!(checksum.sum_b() < RollingChecksum::MOD);
}
#[test]
fn deterministic(data in prop::collection::vec(any::<u8>(), 0..500)) {
let checksum1 = RollingChecksum::new(&data);
let checksum2 = RollingChecksum::new(&data);
prop_assert_eq!(checksum1.digest(), checksum2.digest());
}
#[test]
fn empty_is_zero(_unused in 0..1i32) {
let checksum = RollingChecksum::new(&[]);
prop_assert_eq!(checksum.digest(), 0);
}
#[test]
fn push_maintains_bounds(
initial in prop::collection::vec(any::<u8>(), 0..100),
to_push in prop::collection::vec(any::<u8>(), 1..100)
) {
let mut checksum = RollingChecksum::new(&initial);
for byte in to_push {
checksum.push(byte);
prop_assert!(checksum.sum_a() < RollingChecksum::MOD);
prop_assert!(checksum.sum_b() < RollingChecksum::MOD);
}
}
#[test]
fn roll_and_push_no_panic(
data in prop::collection::vec(any::<u8>(), 2..50),
operations in prop::collection::vec((any::<bool>(), any::<u8>(), any::<u8>()), 0..100)
) {
let mut checksum = RollingChecksum::new(&data);
for (is_roll, byte1, byte2) in operations {
if is_roll && !checksum.is_empty() {
checksum.roll(byte1, byte2);
} else {
checksum.push(byte1);
}
prop_assert!(checksum.sum_a() < RollingChecksum::MOD);
prop_assert!(checksum.sum_b() < RollingChecksum::MOD);
}
}
}
}