#[derive(Debug, Clone)]
pub struct MatrixHashFunction {
size: usize,
matrix: Vec<u64>,
size_mask: u64,
}
impl MatrixHashFunction {
pub fn new(kmer_length: usize) -> Self {
let bit_length = kmer_length * 2;
let size_mask = (1u64 << bit_length.min(63)) - 1;
let mut matrix = Vec::with_capacity(bit_length);
let mut seed: u64 = 0x123456789abcdef0;
for _ in 0..bit_length {
seed = seed.wrapping_mul(1103515245).wrapping_add(12345);
matrix.push(seed);
}
Self {
size: bit_length,
matrix,
size_mask,
}
}
pub fn hash(&self, kmer_encoded: u64) -> u64 {
if self.size == 0 {
return kmer_encoded;
}
let mut hash = 0u64;
for i in 0..self.size.min(64) {
let matrix_bit = (self.matrix[i / 64] >> (i % 64)) & 1;
let kmer_bit = (kmer_encoded >> i) & 1;
if matrix_bit ^ kmer_bit == 1 {
hash |= 1u64 << i;
}
}
hash & self.size_mask
}
pub fn size(&self) -> usize {
self.size
}
pub fn size_mask(&self) -> u64 {
self.size_mask
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_matrix_hash_function() {
let hasher = MatrixHashFunction::new(31);
assert_eq!(hasher.size(), 62); }
#[test]
fn test_hash_computation() {
let hasher = MatrixHashFunction::new(8); let kmer = 0x12345678;
let hash1 = hasher.hash(kmer);
let hash2 = hasher.hash(kmer);
assert_eq!(hash1, hash2);
}
#[test]
fn test_different_kmer_sizes() {
let hasher1 = MatrixHashFunction::new(21);
let hasher2 = MatrixHashFunction::new(31);
assert_ne!(hasher1.size(), hasher2.size());
}
#[test]
fn test_hash_bounds() {
let hasher = MatrixHashFunction::new(1);
let kmer = 0b11;
let hash = hasher.hash(kmer);
assert!(hash < (1u64 << (1 * 2))); }
}