use std::hash::Hasher;
const CONST1: u64 = 0xb8e1afed6a267e96;
const CONST2: u64 = 0x082efa98ec4e6c89;
const CONST3: u64 = 0x243f6a8885a308d3;
const CONST4: u64 = 0x13198a2e03707344;
const CONST5: u64 = 0xa4093822299f31d0;
const CONST6: u64 = 0x452821e638d01377;
#[inline(always)]
fn fold_multiply(input: u64, const_1: u64) -> u64 {
let temp = (input as u128).wrapping_mul(const_1 as u128);
(temp as u64) ^ ((temp >> 64) as u64)
}
#[inline(always)]
unsafe fn read_u64(ptr: *const u8) -> u64 {
unsafe { ptr.cast::<u64>().read_unaligned() }
}
#[inline(always)]
unsafe fn read_u32(ptr: *const u8) -> u64 {
unsafe { ptr.cast::<u32>().read_unaligned() as u64 }
}
#[inline]
pub fn jamhash_u64(kmer: u64) -> u64 {
let part1 = kmer.rotate_left(16);
let part2 = kmer.rotate_left(48);
let fold1 = fold_multiply(part1, CONST1);
let fold2 = fold_multiply(part2, CONST2);
fold_multiply(fold1, fold2)
}
#[inline(always)]
fn rapidmix(v: u64, k: u64) -> u64 {
let m = v.wrapping_mul(k);
m ^ (m >> 47)
}
#[inline(always)]
fn jamhash_seeded(value: u64, seed: u64) -> u64 {
let v = value ^ CONST5;
let s = seed.wrapping_add(CONST4);
let mixed = rapidmix(v ^ s, CONST1) ^ rapidmix(v.rotate_left(32) ^ s.rotate_left(17), CONST2);
fold_multiply(mixed ^ s.rotate_left(41), CONST3)
}
#[inline(always)]
fn jam_short(bytes: &[u8], seed: u64) -> u64 {
let len = bytes.len();
let ptr = bytes.as_ptr();
let s = seed.wrapping_add(CONST4);
unsafe {
match len {
0 => jamhash_seeded(0, seed),
1..=3 => {
let combined = ((bytes[0] as u64) << 16)
| ((bytes[len >> 1] as u64) << 8)
| (bytes[len - 1] as u64);
let a = fold_multiply(combined ^ s, CONST1 ^ (len as u64));
fold_multiply(a ^ s.rotate_left(33), CONST2)
}
4..=7 => {
let lo = read_u32(ptr);
let hi = read_u32(ptr.add(len - 4));
let a = fold_multiply(lo ^ s, CONST1);
let b = fold_multiply(hi ^ s.rotate_left(33), CONST2);
fold_multiply(a ^ b, CONST3 ^ (len as u64))
}
8 => jamhash_seeded(read_u64(ptr), seed),
9..=16 => {
let lo = read_u64(ptr);
let hi = read_u64(ptr.add(len - 8));
let a = fold_multiply(lo ^ s, CONST1);
let b = fold_multiply(hi ^ s.rotate_left(33), CONST2);
fold_multiply(
a ^ b ^ lo.rotate_left(23) ^ hi.rotate_left(47),
CONST3 ^ (len as u64),
)
}
_ => unreachable!(),
}
}
}
#[inline(always)]
fn jam_medium(bytes: &[u8], seed: u64) -> u64 {
let len = bytes.len();
let ptr = bytes.as_ptr();
let s = seed.wrapping_add(CONST4);
let mut acc0 = s ^ CONST1;
let mut acc1 = s.rotate_left(33) ^ CONST2;
unsafe {
let mut i = 0;
while i + 16 <= len {
acc0 = fold_multiply(acc0 ^ read_u64(ptr.add(i)), CONST3);
acc1 = fold_multiply(acc1 ^ read_u64(ptr.add(i + 8)), CONST4);
i += 16;
}
if i < len {
acc0 = fold_multiply(acc0 ^ read_u64(ptr.add(len - 16)), CONST5);
acc1 = fold_multiply(acc1 ^ read_u64(ptr.add(len - 8)), CONST6);
}
fold_multiply(
acc0 ^ acc1.rotate_left(23) ^ s,
(acc1 ^ acc0.rotate_left(47)) ^ (len as u64),
)
}
}
#[cold]
#[inline(never)]
fn jam_long(bytes: &[u8], seed: u64) -> u64 {
let len = bytes.len();
let ptr = bytes.as_ptr();
if len >= 4096 {
return jam_ultra_long(bytes, seed);
}
let s = seed.wrapping_add(CONST4);
let mut s0 = s ^ CONST1;
let mut s1 = s.rotate_left(17) ^ CONST2;
let mut s2 = s.rotate_left(33) ^ CONST3;
let mut s3 = s.rotate_left(49) ^ CONST4;
unsafe {
let mut i = 0;
let end64 = len.saturating_sub(63);
let end32 = len - (len % 32);
while i < end64 {
s0 = fold_multiply(s0 ^ read_u64(ptr.add(i)), CONST1);
s1 = fold_multiply(s1 ^ read_u64(ptr.add(i + 8)), CONST2);
s2 = fold_multiply(s2 ^ read_u64(ptr.add(i + 16)), CONST3);
s3 = fold_multiply(s3 ^ read_u64(ptr.add(i + 24)), CONST4);
s0 = fold_multiply(s0 ^ read_u64(ptr.add(i + 32)), CONST5);
s1 = fold_multiply(s1 ^ read_u64(ptr.add(i + 40)), CONST6);
s2 = fold_multiply(s2 ^ read_u64(ptr.add(i + 48)), CONST1);
s3 = fold_multiply(s3 ^ read_u64(ptr.add(i + 56)), CONST2);
i += 64;
}
if i < end32 {
s0 = fold_multiply(s0 ^ read_u64(ptr.add(i)), CONST1);
s1 = fold_multiply(s1 ^ read_u64(ptr.add(i + 8)), CONST2);
s2 = fold_multiply(s2 ^ read_u64(ptr.add(i + 16)), CONST3);
s3 = fold_multiply(s3 ^ read_u64(ptr.add(i + 24)), CONST4);
i += 32;
}
if i < len {
s0 = fold_multiply(s0 ^ read_u64(ptr.add(len - 32)), CONST5);
s1 = fold_multiply(s1 ^ read_u64(ptr.add(len - 24)), CONST6);
s2 = fold_multiply(s2 ^ read_u64(ptr.add(len - 16)), CONST1);
s3 = fold_multiply(s3 ^ read_u64(ptr.add(len - 8)), CONST2);
}
let m0 = fold_multiply(s0 ^ s2.rotate_left(17), CONST5);
let m1 = fold_multiply(s1 ^ s3.rotate_left(31), CONST6);
fold_multiply(
m0 ^ m1.rotate_left(23) ^ s,
(m1 ^ m0.rotate_left(47)) ^ (len as u64),
)
}
}
#[cold]
#[inline(never)]
fn jam_ultra_long(bytes: &[u8], seed: u64) -> u64 {
let len = bytes.len();
let ptr = bytes.as_ptr();
let s = seed.wrapping_add(CONST4);
let mut a0 = s ^ CONST1;
let mut a1 = s.rotate_left(9) ^ CONST2;
let mut a2 = s.rotate_left(17) ^ CONST3;
let mut a3 = s.rotate_left(25) ^ CONST4;
let mut a4 = s.rotate_left(33) ^ CONST5;
let mut a5 = s.rotate_left(41) ^ CONST6;
let mut a6 = s.rotate_left(49) ^ CONST1;
let mut a7 = s.rotate_left(57) ^ CONST2;
unsafe {
let mut i = 0;
let end = len - (len % 64);
while i < end {
a0 = fold_multiply(a0 ^ read_u64(ptr.add(i)), CONST1);
a1 = fold_multiply(a1 ^ read_u64(ptr.add(i + 8)), CONST2);
a2 = fold_multiply(a2 ^ read_u64(ptr.add(i + 16)), CONST3);
a3 = fold_multiply(a3 ^ read_u64(ptr.add(i + 24)), CONST4);
a4 = fold_multiply(a4 ^ read_u64(ptr.add(i + 32)), CONST5);
a5 = fold_multiply(a5 ^ read_u64(ptr.add(i + 40)), CONST6);
a6 = fold_multiply(a6 ^ read_u64(ptr.add(i + 48)), CONST1);
a7 = fold_multiply(a7 ^ read_u64(ptr.add(i + 56)), CONST2);
i += 64;
}
if i < len {
let tail_ptr = ptr.add(len - 64);
a0 = fold_multiply(a0 ^ read_u64(tail_ptr), CONST3);
a1 = fold_multiply(a1 ^ read_u64(tail_ptr.add(8)), CONST4);
a2 = fold_multiply(a2 ^ read_u64(tail_ptr.add(16)), CONST5);
a3 = fold_multiply(a3 ^ read_u64(tail_ptr.add(24)), CONST6);
a4 = fold_multiply(a4 ^ read_u64(tail_ptr.add(32)), CONST1);
a5 = fold_multiply(a5 ^ read_u64(tail_ptr.add(40)), CONST2);
a6 = fold_multiply(a6 ^ read_u64(tail_ptr.add(48)), CONST3);
a7 = fold_multiply(a7 ^ read_u64(tail_ptr.add(56)), CONST4);
}
let m0 = fold_multiply(a0 ^ a4.rotate_left(17), CONST5);
let m1 = fold_multiply(a1 ^ a5.rotate_left(23), CONST6);
let m2 = fold_multiply(a2 ^ a6.rotate_left(31), CONST1);
let m3 = fold_multiply(a3 ^ a7.rotate_left(37), CONST2);
let n0 = fold_multiply(m0 ^ m2.rotate_left(19), CONST3);
let n1 = fold_multiply(m1 ^ m3.rotate_left(29), CONST4);
fold_multiply(
n0 ^ n1.rotate_left(23) ^ s,
(n1 ^ n0.rotate_left(47)) ^ (len as u64),
)
}
}
pub struct JamHasher {
accumulator: u64,
len: u64,
}
impl JamHasher {
#[inline]
pub fn new() -> Self {
Self {
accumulator: 0,
len: 0,
}
}
pub fn new_with_seed(seed: u64) -> Self {
Self {
accumulator: seed,
len: 0,
}
}
}
impl Default for JamHasher {
fn default() -> Self {
Self::new()
}
}
impl Hasher for JamHasher {
#[inline]
fn write(&mut self, bytes: &[u8]) {
self.len += bytes.len() as u64;
self.accumulator = match bytes.len() {
0..=16 => jam_short(bytes, self.accumulator),
17..=128 => jam_medium(bytes, self.accumulator),
_ => jam_long(bytes, self.accumulator),
};
}
#[inline]
fn finish(&self) -> u64 {
jamhash_u64(self.accumulator ^ self.len)
}
}
#[inline]
pub fn jamhash_bytes(bytes: &[u8]) -> u64 {
let len = bytes.len();
let ptr = bytes.as_ptr();
match len {
0 => 0,
1..=3 => {
let packed = ((bytes[0] as u64) << 16)
| ((bytes[len >> 1] as u64) << 8)
| (bytes[len - 1] as u64);
jamhash_u64(packed ^ (len as u64).wrapping_mul(CONST5))
}
4..=7 => {
let packed = unsafe { read_u32(ptr) | (read_u32(ptr.add(len - 4)) << 32) };
jamhash_u64(packed ^ (len as u64).wrapping_mul(CONST5))
}
8 => {
jamhash_u64(unsafe { read_u64(ptr) } ^ (8u64).wrapping_mul(CONST5))
}
9..=16 => {
let (lo, hi) = unsafe { (read_u64(ptr), read_u64(ptr.add(len - 8))) };
let temp = ((lo ^ CONST1) as u128).wrapping_mul((hi ^ CONST2) as u128);
let mixed = (temp as u64) ^ ((temp >> 64) as u64);
jamhash_u64(mixed ^ (len as u64).wrapping_mul(CONST5))
}
17..=128 => jamhash_u64(jam_medium(bytes, 0)),
_ => jamhash_u64(jam_long(bytes, 0)),
}
}
#[cfg(test)]
#[allow(clippy::needless_range_loop)]
mod tests {
use super::*;
#[test]
fn test_jamhash_u64_zero() {
assert_eq!(jamhash_u64(0), 0, "hash(0) should be 0");
}
#[test]
fn test_jamhash_u64_nonzero() {
let hash = jamhash_u64(0x1234567890abcdef);
assert_ne!(hash, 0);
}
#[test]
fn test_jamhash_bytes_consistency() {
let data = b"hello world";
let hash1 = jamhash_bytes(data);
let hash2 = jamhash_bytes(data);
assert_eq!(hash1, hash2, "same input should produce same hash");
}
#[test]
fn test_different_lengths() {
let hash1 = jamhash_bytes(&[0]);
let hash2 = jamhash_bytes(&[0, 0]);
let hash3 = jamhash_bytes(&[0, 0, 0]);
assert_ne!(hash1, hash2);
assert_ne!(hash2, hash3);
assert_ne!(hash1, hash3);
}
#[test]
fn test_partial_buffer_lengths() {
for len in 1..8 {
let data: Vec<u8> = (0..len).map(|i| i as u8).collect();
let hash = jamhash_bytes(&data);
assert_ne!(hash, 0, "hash of {:?} should not be zero", data);
}
}
#[test]
fn test_full_buffer() {
let data = [1u8, 2, 3, 4, 5, 6, 7, 8];
let hash = jamhash_bytes(&data);
assert_ne!(hash, 0);
}
#[test]
fn test_multiple_buffers() {
let data = [1u8; 24]; let hash = jamhash_bytes(&data);
assert_ne!(hash, 0);
}
#[test]
fn test_avalanche() {
let hash1 = jamhash_bytes(b"test");
let hash2 = jamhash_bytes(b"Test");
let diff = (hash1 ^ hash2).count_ones();
assert!(
diff > 10,
"avalanche effect too weak: only {} bits differ",
diff
);
}
#[test]
fn test_bit_flip_9_bytes() {
let base = [0u8; 9];
let base_hash = jamhash_bytes(&base);
for bit in 0..72 {
let mut modified = base;
modified[bit / 8] ^= 1 << (bit % 8);
let modified_hash = jamhash_bytes(&modified);
assert_ne!(
base_hash, modified_hash,
"Flipping bit {} in 9-byte input produced identical hash",
bit
);
}
}
#[test]
fn test_bit_flip_all_short_lengths() {
for len in 1..=16 {
let base: Vec<u8> = vec![0u8; len];
let base_hash = jamhash_bytes(&base);
for bit in 0..(len * 8) {
let mut modified = base.clone();
modified[bit / 8] ^= 1 << (bit % 8);
let modified_hash = jamhash_bytes(&modified);
assert_ne!(
base_hash, modified_hash,
"Flipping bit {} in {}-byte input produced identical hash",
bit, len
);
}
}
}
#[test]
fn test_cyclic_4_cycles_of_8_bytes() {
use std::collections::HashSet;
let mut hashes = HashSet::new();
let num_patterns = 10000;
for i in 0..num_patterns {
let pattern: [u8; 8] = [
(i & 0xFF) as u8,
((i >> 8) & 0xFF) as u8,
((i >> 16) & 0xFF) as u8,
((i >> 24) & 0xFF) as u8,
0,
0,
0,
0,
];
let mut data = [0u8; 32];
for cycle in 0..4 {
data[cycle * 8..(cycle + 1) * 8].copy_from_slice(&pattern);
}
let hash = jamhash_bytes(&data);
hashes.insert(hash);
}
let collision_rate = 1.0 - (hashes.len() as f64 / num_patterns as f64);
assert!(
collision_rate < 0.001,
"Cyclic 4x8 test: {} unique hashes from {} patterns ({:.2}% collision rate)",
hashes.len(),
num_patterns,
collision_rate * 100.0
);
}
#[test]
fn test_cyclic_8_cycles_of_4_bytes() {
use std::collections::HashSet;
let mut hashes = HashSet::new();
let num_patterns = 10000;
for i in 0..num_patterns {
let pattern: [u8; 4] = [
(i & 0xFF) as u8,
((i >> 8) & 0xFF) as u8,
((i >> 16) & 0xFF) as u8,
((i >> 24) & 0xFF) as u8,
];
let mut data = [0u8; 32];
for cycle in 0..8 {
data[cycle * 4..(cycle + 1) * 4].copy_from_slice(&pattern);
}
let hash = jamhash_bytes(&data);
hashes.insert(hash);
}
let collision_rate = 1.0 - (hashes.len() as f64 / num_patterns as f64);
assert!(
collision_rate < 0.001,
"Cyclic 8x4 test: {} unique hashes from {} patterns ({:.2}% collision rate)",
hashes.len(),
num_patterns,
collision_rate * 100.0
);
}
#[test]
fn test_cyclic_patterns_must_differ() {
let pattern_a: [u8; 32] = [
1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0,
];
let pattern_b: [u8; 32] = [
2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0,
0, 0, 0,
];
let hash_a = jamhash_bytes(&pattern_a);
let hash_b = jamhash_bytes(&pattern_b);
assert_ne!(
hash_a, hash_b,
"Different cyclic patterns produced identical hash: {:016x}",
hash_a
);
}
#[test]
fn test_sparse_32_byte_keys() {
use std::collections::HashSet;
let mut hashes = HashSet::new();
for byte_pos in 0..32 {
for bit_pos in 0..8 {
let mut data = [0u8; 32];
data[byte_pos] = 1 << bit_pos;
let hash = jamhash_bytes(&data);
let inserted = hashes.insert(hash);
assert!(
inserted,
"Collision: bit {} of byte {} in 32-byte sparse key",
bit_pos, byte_pos
);
}
}
assert_eq!(
hashes.len(),
32 * 8,
"Expected 256 unique hashes for single-bit patterns"
);
}
#[test]
fn test_long_cyclic_64_bytes() {
let pattern_a: Vec<u8> = (0..8).cycle().take(64).collect();
let pattern_b: Vec<u8> = (1..9).cycle().take(64).collect();
let hash_a = jamhash_bytes(&pattern_a);
let hash_b = jamhash_bytes(&pattern_b);
assert_ne!(
hash_a, hash_b,
"Different 64-byte cyclic patterns produced identical hash"
);
}
#[test]
fn test_ultra_long_5kb() {
use std::collections::HashSet;
let mut hashes = HashSet::new();
let num_patterns = 100u64;
for i in 0u64..num_patterns {
let data: Vec<u8> = (0u64..5120)
.map(|j| (i.wrapping_add(j).wrapping_mul(0x9e3779b97f4a7c15) >> 56) as u8)
.collect();
let hash = jamhash_bytes(&data);
hashes.insert(hash);
}
let collision_rate = 1.0 - (hashes.len() as f64 / num_patterns as f64);
assert!(
collision_rate < 0.01,
"5KB data: {} unique from {} ({:.2}% collision)",
hashes.len(),
num_patterns,
collision_rate * 100.0
);
}
#[test]
fn test_ultra_long_bit_flip() {
let base: Vec<u8> = (0..5120).map(|i| (i * 7) as u8).collect();
let base_hash = jamhash_bytes(&base);
for pos in [0, 1000, 2000, 3000, 4000, 5000, 5119] {
let mut modified = base.clone();
modified[pos] ^= 1;
let modified_hash = jamhash_bytes(&modified);
let diff_bits = (base_hash ^ modified_hash).count_ones();
assert!(
diff_bits >= 20,
"Flip at pos {}: only {} bits differ (expected >=20)",
pos,
diff_bits
);
}
}
#[test]
fn test_long_cyclic_256_bytes() {
use std::collections::HashSet;
let mut hashes = HashSet::new();
let num_patterns = 1000;
for i in 0u64..num_patterns {
let mut base = [0u8; 32];
base[0] = (i & 0xFF) as u8;
base[1] = ((i >> 8) & 0xFF) as u8;
base[2] = ((i >> 16) & 0xFF) as u8;
base[3] = ((i >> 24) & 0xFF) as u8;
for j in 4..32 {
base[j] = ((i.wrapping_add(j as u64)) & 0xFF) as u8;
}
let data: Vec<u8> = base.iter().cycle().take(256).cloned().collect();
let hash = jamhash_bytes(&data);
hashes.insert(hash);
}
let collision_rate = 1.0 - (hashes.len() as f64 / num_patterns as f64);
assert!(
collision_rate < 0.01, "256-byte cyclic: {} unique from {} ({:.2}% collision)",
hashes.len(),
num_patterns,
collision_rate * 100.0
);
}
#[test]
fn test_xor_cancellation_bug() {
let pattern_a = [1u8; 16];
let pattern_b = [2u8; 16];
let mut data_a = [0u8; 32];
data_a[0..16].copy_from_slice(&pattern_a);
data_a[16..32].copy_from_slice(&pattern_a);
let mut data_b = [0u8; 32];
data_b[0..16].copy_from_slice(&pattern_b);
data_b[16..32].copy_from_slice(&pattern_b);
let hash_a = jamhash_bytes(&data_a);
let hash_b = jamhash_bytes(&data_b);
assert_ne!(
hash_a, hash_b,
"XOR cancellation bug detected! Different 32-byte inputs with 16-byte repeat \
produced identical hash: {:016x}",
hash_a
);
}
#[test]
fn test_xor_cancellation_jam_long() {
let pattern_a: [u8; 64] = [1u8; 64];
let pattern_b: [u8; 64] = [2u8; 64];
let data_a: Vec<u8> = pattern_a.iter().cycle().take(256).cloned().collect();
let data_b: Vec<u8> = pattern_b.iter().cycle().take(256).cloned().collect();
let hash_a = jamhash_bytes(&data_a);
let hash_b = jamhash_bytes(&data_b);
assert_ne!(
hash_a, hash_b,
"XOR cancellation bug in jam_long! Different 256-byte inputs produced identical hash: {:016x}",
hash_a
);
}
#[test]
fn test_smhasher_permutation_8bytes_zero_low_bit() {
use std::collections::HashSet;
let mut hashes = HashSet::new();
let mut collisions = Vec::new();
for bit in 0..8u32 {
let low_bit_val = 1u64 << bit;
let mut data_le = [0u8; 16];
data_le[8..16].copy_from_slice(&low_bit_val.to_le_bytes());
let hash_le = jamhash_bytes(&data_le);
if !hashes.insert(hash_le) {
collisions.push(format!("LE [0, 1<<{}]", bit));
}
let mut data_be = [0u8; 16];
data_be[0..8].copy_from_slice(&low_bit_val.to_be_bytes());
let hash_be = jamhash_bytes(&data_be);
if !hashes.insert(hash_be) {
collisions.push(format!("BE [0, 1<<{}]", bit));
}
let mut data_le_rev = [0u8; 16];
data_le_rev[0..8].copy_from_slice(&low_bit_val.to_le_bytes());
let hash_le_rev = jamhash_bytes(&data_le_rev);
if !hashes.insert(hash_le_rev) {
collisions.push(format!("LE [1<<{}, 0]", bit));
}
let mut data_be_rev = [0u8; 16];
data_be_rev[8..16].copy_from_slice(&low_bit_val.to_be_bytes());
let hash_be_rev = jamhash_bytes(&data_be_rev);
if !hashes.insert(hash_be_rev) {
collisions.push(format!("BE [1<<{}, 0]", bit));
}
}
assert!(
collisions.is_empty(),
"Permutation 8-bytes [0, low bit] collisions: {:?}",
collisions
);
}
#[test]
fn test_smhasher_permutation_8bytes_zero_high_bit() {
use std::collections::HashSet;
let mut hashes = HashSet::new();
let mut collisions = Vec::new();
for bit in 56..64u32 {
let high_bit_val = 1u64 << bit;
let patterns = [
(0u64, high_bit_val, "LE [0, high]"),
(high_bit_val, 0u64, "LE [high, 0]"),
];
for (v1, v2, name) in patterns {
let mut data = [0u8; 16];
data[0..8].copy_from_slice(&v1.to_le_bytes());
data[8..16].copy_from_slice(&v2.to_le_bytes());
let hash = jamhash_bytes(&data);
if !hashes.insert(hash) {
collisions.push(format!("{} bit {}", name, bit));
}
}
}
assert!(
collisions.is_empty(),
"Permutation 8-bytes [0, high bit] collisions: {:?}",
collisions
);
}
#[test]
fn test_smhasher_text_long_alnum_last() {
use std::collections::HashSet;
let base_sizes = [2048, 4096, 8192];
for &size in &base_sizes {
let mut hashes = HashSet::new();
let base: Vec<u8> = (0..size)
.map(|i| {
let c = i % 62;
if c < 10 {
b'0' + c as u8
} else if c < 36 {
b'A' + (c - 10) as u8
} else {
b'a' + (c - 36) as u8
}
})
.collect();
for last_byte in 0..=255u8 {
let mut data = base.clone();
data[size - 1] = last_byte;
let hash = jamhash_bytes(&data);
hashes.insert(hash);
}
assert_eq!(
hashes.len(),
256,
"Text long alnum size {}: expected 256 unique hashes, got {}",
size,
hashes.len()
);
}
}
#[test]
fn test_smhasher_perlin_noise_2d() {
use std::collections::HashSet;
let mut hashes = HashSet::new();
let grid_size = 256;
for y in 0..grid_size {
for x in 0..grid_size {
let mut data = [0u8; 8];
data[0..4].copy_from_slice(&(x as u32).to_le_bytes());
data[4..8].copy_from_slice(&(y as u32).to_le_bytes());
let hash = jamhash_bytes(&data);
hashes.insert(hash);
}
}
let total = grid_size * grid_size;
let collision_rate = 1.0 - (hashes.len() as f64 / total as f64);
assert!(
collision_rate < 0.0001, "PerlinNoise 2D: {} unique from {} ({:.4}% collision)",
hashes.len(),
total,
collision_rate * 100.0
);
}
#[test]
fn test_smhasher_seed_block_len() {
let test_lengths = [8, 9, 10, 11, 12, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31];
for &len in &test_lengths {
let data: Vec<u8> = (0..len).map(|i| i as u8).collect();
let mut hasher1 = JamHasher::new_with_seed(0);
hasher1.write(&data);
let hash1 = hasher1.finish();
let mut hasher2 = JamHasher::new_with_seed(1);
hasher2.write(&data);
let hash2 = hasher2.finish();
let mut hasher3 = JamHasher::new_with_seed(0x12345678);
hasher3.write(&data);
let hash3 = hasher3.finish();
assert_ne!(
hash1, hash2,
"SeedBlockLen {}: seed 0 and 1 produced same hash",
len
);
assert_ne!(
hash1, hash3,
"SeedBlockLen {}: seed 0 and 0x12345678 produced same hash",
len
);
assert_ne!(
hash2, hash3,
"SeedBlockLen {}: seed 1 and 0x12345678 produced same hash",
len
);
}
}
#[test]
fn test_smhasher_seed_block_offset() {
let data_len = 64;
for offset in 0..=5 {
let mut data: Vec<u8> = vec![0u8; data_len];
data[offset] = 0xFF;
let mut hasher1 = JamHasher::new_with_seed(0);
hasher1.write(&data);
let hash1 = hasher1.finish();
let mut hasher2 = JamHasher::new_with_seed(0x55555555);
hasher2.write(&data);
let hash2 = hasher2.finish();
assert_ne!(
hash1, hash2,
"SeedBlockOffset {}: different seeds produced same hash",
offset
);
let mut data_shifted = vec![0u8; data_len];
data_shifted[(offset + 1) % data_len] = 0xFF;
let mut hasher3 = JamHasher::new_with_seed(0);
hasher3.write(&data_shifted);
let hash3 = hasher3.finish();
assert_ne!(
hash1, hash3,
"SeedBlockOffset {}: different offsets with same seed produced same hash",
offset
);
}
}
#[test]
fn test_seed_avalanche() {
let data = b"test data for seed avalanche";
for seed_bit in 0..64 {
let seed1 = 0u64;
let seed2 = 1u64 << seed_bit;
let mut hasher1 = JamHasher::new_with_seed(seed1);
hasher1.write(data);
let hash1 = hasher1.finish();
let mut hasher2 = JamHasher::new_with_seed(seed2);
hasher2.write(data);
let hash2 = hasher2.finish();
let diff_bits = (hash1 ^ hash2).count_ones();
assert!(
diff_bits >= 20,
"Seed bit {} flip: only {} bits differ (expected ~32)",
seed_bit,
diff_bits
);
}
}
#[test]
fn test_8byte_sparse_patterns() {
use std::collections::HashSet;
let mut hashes = HashSet::new();
for bit in 0..64 {
let val = 1u64 << bit;
let hash = jamhash_bytes(&val.to_le_bytes());
assert!(hashes.insert(hash), "8-byte single bit {} collision", bit);
}
for bit in 0..63 {
let val = 3u64 << bit;
let hash = jamhash_bytes(&val.to_le_bytes());
assert!(
hashes.insert(hash),
"8-byte two adjacent bits at {} collision",
bit
);
}
}
#[test]
fn test_strict_avalanche_criterion_8bytes() {
let num_samples = 1000;
let mut bit_flip_counts = [[0u32; 64]; 64];
for sample in 0..num_samples {
let base = (sample as u64).wrapping_mul(0x9e3779b97f4a7c15);
let base_hash = jamhash_bytes(&base.to_le_bytes());
for input_bit in 0..64 {
let flipped = base ^ (1u64 << input_bit);
let flipped_hash = jamhash_bytes(&flipped.to_le_bytes());
let diff = base_hash ^ flipped_hash;
for output_bit in 0..64 {
if (diff >> output_bit) & 1 == 1 {
bit_flip_counts[input_bit][output_bit] += 1;
}
}
}
}
let threshold = 0.35; let mut failures = Vec::new();
for input_bit in 0..64 {
for output_bit in 0..64 {
let flip_rate = bit_flip_counts[input_bit][output_bit] as f64 / num_samples as f64;
if (flip_rate - 0.5).abs() > threshold {
failures.push((input_bit, output_bit, flip_rate));
}
}
}
assert!(
failures.len() < 10, "SAC failures (input_bit, output_bit, flip_rate): {:?}",
&failures[..failures.len().min(20)]
);
}
#[test]
fn test_bit_independence_criterion() {
let num_samples = 10000;
let mut pair_flip_counts = [0u32; 64];
for sample in 0..num_samples {
let base = (sample as u64).wrapping_mul(0x9e3779b97f4a7c15);
let base_hash = jamhash_bytes(&base.to_le_bytes());
let flipped = base ^ 1;
let flipped_hash = jamhash_bytes(&flipped.to_le_bytes());
let diff = base_hash ^ flipped_hash;
let bit0_flipped = diff & 1;
for output_bit in 1..64 {
let bit_flipped = (diff >> output_bit) & 1;
if bit0_flipped == 1 && bit_flipped == 1 {
pair_flip_counts[output_bit] += 1;
}
}
}
let mut failures = Vec::new();
for output_bit in 1..64 {
let pair_rate = pair_flip_counts[output_bit] as f64 / num_samples as f64;
if !(0.10..=0.40).contains(&pair_rate) {
failures.push((output_bit, pair_rate));
}
}
assert!(
failures.len() < 5,
"BIC failures (output_bit, pair_rate): {:?}",
failures
);
}
#[test]
fn test_permutation_bias_8bytes_zero_patterns() {
let mut bit_counts = [0u32; 64];
let num_samples = 10000;
for i in 0..num_samples {
let mut data = [0u8; 16];
data[8..16].copy_from_slice(&(i as u64).to_le_bytes());
let hash = jamhash_bytes(&data);
for bit in 0..64 {
if (hash >> bit) & 1 == 1 {
bit_counts[bit] += 1;
}
}
}
let mut biased_bits = Vec::new();
for bit in 0..64 {
let rate = bit_counts[bit] as f64 / num_samples as f64;
if (rate - 0.5).abs() > 0.05 {
biased_bits.push((bit, rate));
}
}
assert!(
biased_bits.len() < 5,
"Permutation [0, X] bias: bits with >5% deviation from 50%: {:?}",
biased_bits
);
}
#[test]
fn test_seed_mixing_quality() {
let data = [0x42u8; 32];
let mut total_diff_bits = 0u32;
let mut min_diff = 64u32;
let mut max_diff = 0u32;
for seed_bit in 0..64 {
let seed1 = 0u64;
let seed2 = 1u64 << seed_bit;
let mut h1 = JamHasher::new_with_seed(seed1);
h1.write(&data);
let hash1 = h1.finish();
let mut h2 = JamHasher::new_with_seed(seed2);
h2.write(&data);
let hash2 = h2.finish();
let diff = (hash1 ^ hash2).count_ones();
total_diff_bits += diff;
min_diff = min_diff.min(diff);
max_diff = max_diff.max(diff);
}
let avg_diff = total_diff_bits as f64 / 64.0;
assert!(
avg_diff >= 25.0,
"Seed mixing too weak: avg {} bit diff (expected ~32)",
avg_diff
);
assert!(
min_diff >= 15,
"Seed mixing has weak bit: min {} diff (expected >=20)",
min_diff
);
}
#[test]
fn test_smhasher_twobytes_exhaustive() {
use std::collections::HashSet;
let mut hashes = HashSet::new();
let mut collisions = Vec::new();
for a in 0u8..=255 {
for b in 0u8..=255 {
let hash = jamhash_bytes(&[a, b]);
if !hashes.insert(hash) {
collisions.push((a, b, hash));
}
}
}
assert!(
collisions.is_empty(),
"TwoBytes collisions found: {:?}",
&collisions[..collisions.len().min(10)]
);
assert_eq!(hashes.len(), 65536, "Expected 65536 unique hashes");
}
#[test]
fn test_smhasher_twobytes_bit_distribution() {
let mut bit_counts = [0u32; 64];
for a in 0u8..=255 {
for b in 0u8..=255 {
let hash = jamhash_bytes(&[a, b]);
for bit in 0..64 {
if (hash >> bit) & 1 == 1 {
bit_counts[bit] += 1;
}
}
}
}
let mut biased_bits = Vec::new();
for (bit, &count) in bit_counts.iter().enumerate() {
let rate = count as f64 / 65536.0;
if (rate - 0.5).abs() > 0.05 {
biased_bits.push((bit, rate));
}
}
assert!(
biased_bits.is_empty(),
"TwoBytes bit distribution failures (bit, rate): {:?}",
biased_bits
);
}
#[test]
fn test_smhasher_twobytes_avalanche() {
let mut total_flips = 0u64;
let mut min_flips = 64u32;
let mut samples = 0u64;
let mut worst_case = (0u8, 0u8, 0usize);
for a in 0u8..=255 {
for b in 0u8..=255 {
let base_hash = jamhash_bytes(&[a, b]);
for bit in 0..16 {
let (na, nb) = if bit < 8 {
(a ^ (1 << bit), b)
} else {
(a, b ^ (1 << (bit - 8)))
};
let flipped_hash = jamhash_bytes(&[na, nb]);
let diff_bits = (base_hash ^ flipped_hash).count_ones();
total_flips += diff_bits as u64;
if diff_bits < min_flips {
min_flips = diff_bits;
worst_case = (a, b, bit);
}
samples += 1;
}
}
}
let avg_flips = total_flips as f64 / samples as f64;
let _ = worst_case;
assert!(
(28.0..=36.0).contains(&avg_flips),
"TwoBytes avalanche: avg {} bits flipped (expected ~32)",
avg_flips
);
assert!(
min_flips >= 12,
"TwoBytes avalanche: min {} bits (expected >=12)",
min_flips
);
}
#[test]
fn test_sequential_distribution() {
let num_samples = 100000;
let num_buckets = 256; let mut bucket_counts = vec![0u32; num_buckets];
for i in 0..num_samples {
let hash = jamhash_bytes(&(i as u64).to_le_bytes());
let bucket = (hash as usize) % num_buckets;
bucket_counts[bucket] += 1;
}
let expected = num_samples as f64 / num_buckets as f64;
let chi_sq: f64 = bucket_counts
.iter()
.map(|&c| {
let diff = c as f64 - expected;
diff * diff / expected
})
.sum();
assert!(
chi_sq < 350.0,
"Sequential distribution chi-sq {} (expected < 310)",
chi_sq
);
}
#[test]
fn test_seed_block_len_statistical() {
let test_lengths = [8, 9, 10, 11, 12, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31];
let num_seeds = 100;
for &len in &test_lengths {
let data: Vec<u8> = (0..len).map(|i| i as u8).collect();
let mut hashes = Vec::with_capacity(num_seeds);
for seed in 0..num_seeds {
let mut hasher = JamHasher::new_with_seed(seed as u64);
hasher.write(&data);
hashes.push(hasher.finish());
}
for i in 0..num_seeds {
for j in (i + 1)..num_seeds {
assert_ne!(
hashes[i], hashes[j],
"SeedBlockLen {}: seeds {} and {} produced same hash",
len, i, j
);
}
}
let mut bit_counts = [0u32; 64];
for &hash in &hashes {
for bit in 0..64 {
if (hash >> bit) & 1 == 1 {
bit_counts[bit] += 1;
}
}
}
for bit in 0..64 {
let rate = bit_counts[bit] as f64 / num_seeds as f64;
assert!(
(0.2..=0.8).contains(&rate),
"SeedBlockLen {}: bit {} has rate {} (expected ~0.5)",
len,
bit,
rate
);
}
}
}
}