const BASE: i128 = 256;
const HASHMAX: i128 = u64::MAX as i128;
pub fn hash_ascii(bytes: &[u8], k: usize) -> Vec<u64> {
let mut i = 0;
let mut bytes: Vec<u8> = bytes.to_vec();
if bytes.len() < k {
let more_bytes: Vec<u8> = vec![0u8; k - bytes.len()];
bytes.extend(more_bytes);
}
let mut hash: i128 = 0;
let mut hashes: Vec<u64> = Vec::new();
while i < bytes.len() - k {
let slice: &[u8] = &bytes[i..i+k+1];
if i == 0 {
for (j, c) in slice.iter().enumerate() {
let power: u32 = k as u32 - j as u32 + 1;
hash += BASE.pow(power)*(*c as i128);
hash %= HASHMAX;
}
hashes.push(hash as u64);
} else {
let discarded_value = bytes[i - 1];
hash -= BASE.pow(k as u32 + 1)*(discarded_value as i128);
hash += slice[k] as i128;
hash *= BASE;
hash %= HASHMAX;
hashes.push(hash as u64);
}
i += 1;
}
hashes
}
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
struct Hash {
value: u64,
position: usize,
}
pub fn fingerprint(bytes: &[u8], t: usize, k: usize) -> Vec<u64> {
let mut fingerprint: Vec<Hash> = Vec::new();
let raw_hashes: Vec<u64> = hash_ascii(bytes, k);
let mut hashes: Vec<Hash> = Vec::new();
for (i, value) in raw_hashes.iter().enumerate() {
let hash = Hash {
value: *value,
position: i,
};
hashes.push(hash);
}
let w: usize = t - k + 1;
let mut i = 0;
while i < hashes.len() - w + 1 {
let window = &hashes[i..i+w];
let minimum_hash = match window.iter().min() {
Some(value) => value,
None => {
return Vec::new()
}
};
if !fingerprint.contains(minimum_hash) {
fingerprint.push(*minimum_hash);
}
i += 1;
}
fingerprint.iter().map(|x| x.value).collect()
}