use std::marker::PhantomData;
use umash_sys as ffi;
#[derive(Clone)]
pub struct Params(ffi::umash_params);
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum UmashComponent {
Hash = 0,
Secondary = 1,
}
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Ord, PartialOrd)]
pub struct Fingerprint {
pub hash: [u64; 2],
}
impl Fingerprint {
#[inline(always)]
pub fn new(hash: u64, secondary: u64) -> Self {
Fingerprint {
hash: [hash, secondary],
}
}
#[inline(always)]
pub fn hash(&self) -> u64 {
self.hash[0]
}
#[inline(always)]
pub fn secondary(&self) -> u64 {
self.hash[1]
}
pub fn component(&self, which: UmashComponent) -> u64 {
self.hash[which as usize]
}
}
#[derive(Clone)]
pub struct Hasher<'params>(ffi::umash_state, PhantomData<&'params Params>);
#[derive(Clone)]
pub struct Fingerprinter<'params>(ffi::umash_fp_state, PhantomData<&'params Params>);
impl Params {
pub fn new() -> Self {
use std::cell::Cell;
thread_local!(static RANDOM_STATE: ([u8; 32], Cell<u64>) = {
let mut slice = [0u8; 32];
getrandom::getrandom(&mut slice).expect("failed to generate 32 random bytes");
(slice, Cell::new(0))
});
RANDOM_STATE.with(|state| {
let counter = state.1.get();
state.1.set(counter.wrapping_add(1));
Params::derive(counter, &state.0)
})
}
pub fn derive(bits: u64, key: &[u8]) -> Self {
let mut params: Self = unsafe { std::mem::zeroed() };
let mut key_vec = [0u8; 32];
let to_copy = key.len().min(key_vec.len());
key_vec[..to_copy].copy_from_slice(key);
unsafe {
ffi::umash_params_derive(&mut params.0, bits, key_vec.as_ptr() as *const _);
}
params
}
#[inline(always)]
pub fn hasher(&self, seed: u64) -> Hasher {
self.component_hasher(seed, UmashComponent::Hash)
}
#[inline(always)]
pub fn secondary_hasher(&self, seed: u64) -> Hasher {
self.component_hasher(seed, UmashComponent::Secondary)
}
#[inline(always)]
pub fn component_hasher(&self, seed: u64, which: UmashComponent) -> Hasher {
Hasher::with_params(self, seed, which)
}
#[inline(always)]
pub fn fingerprinter(&self, seed: u64) -> Fingerprinter {
Fingerprinter::with_params(self, seed)
}
pub fn hash(&self, object: impl std::hash::Hash) -> u64 {
let mut hasher = self.hasher(0);
object.hash(&mut hasher);
hasher.digest()
}
pub fn secondary(&self, object: impl std::hash::Hash) -> u64 {
let mut hasher = self.secondary_hasher(0);
object.hash(&mut hasher);
hasher.digest()
}
pub fn fingerprint(&self, object: impl std::hash::Hash) -> Fingerprint {
let mut hasher = self.fingerprinter(0);
object.hash(&mut hasher);
hasher.digest()
}
}
impl Default for Params {
#[inline(always)]
fn default() -> Self {
Self::new()
}
}
impl<'params> std::hash::BuildHasher for &'params Params {
type Hasher = Hasher<'params>;
fn build_hasher(&self) -> Hasher<'params> {
(*self).into()
}
}
impl<'params> Hasher<'params> {
#[inline(always)]
fn with_params(params: &'params Params, seed: u64, which: UmashComponent) -> Self {
let mut state = Hasher(unsafe { std::mem::zeroed() }, PhantomData);
unsafe {
ffi::umash_init(&mut state.0, ¶ms.0, seed, which as i32);
}
state
}
#[inline(always)]
pub fn write(&mut self, bytes: &[u8]) -> &mut Self {
unsafe {
ffi::umash_sink_update(
&mut self.0.sink,
bytes.as_ptr() as *const _,
bytes.len() as u64,
);
}
self
}
#[inline(always)]
pub fn digest(&self) -> u64 {
unsafe { ffi::umash_digest(&self.0) }
}
}
impl<'params> From<&'params Params> for Hasher<'params> {
#[inline(always)]
fn from(params: &'params Params) -> Hasher<'params> {
params.hasher(0)
}
}
impl std::hash::Hasher for Hasher<'_> {
#[inline(always)]
fn finish(&self) -> u64 {
self.digest()
}
#[inline(always)]
fn write(&mut self, bytes: &[u8]) {
Self::write(self, bytes);
}
}
impl std::io::Write for Hasher<'_> {
#[inline(always)]
fn write(&mut self, bytes: &[u8]) -> std::io::Result<usize> {
Self::write(self, bytes);
Ok(bytes.len())
}
#[inline(always)]
fn flush(&mut self) -> std::io::Result<()> {
Ok(())
}
}
impl<'params> Fingerprinter<'params> {
#[inline(always)]
fn with_params(params: &'params Params, seed: u64) -> Self {
let mut state = Self(unsafe { std::mem::zeroed() }, PhantomData);
unsafe {
ffi::umash_fp_init(&mut state.0, ¶ms.0, seed);
}
state
}
#[inline(always)]
pub fn write(&mut self, bytes: &[u8]) -> &mut Self {
unsafe {
ffi::umash_sink_update(
&mut self.0.sink,
bytes.as_ptr() as *const _,
bytes.len() as u64,
);
}
self
}
#[inline(always)]
pub fn digest(&self) -> Fingerprint {
let fprint = unsafe { ffi::umash_fp_digest(&self.0) };
Fingerprint { hash: fprint.hash }
}
}
impl<'params> From<&'params Params> for Fingerprinter<'params> {
#[inline(always)]
fn from(params: &'params Params) -> Fingerprinter<'params> {
params.fingerprinter(0)
}
}
impl std::hash::Hasher for Fingerprinter<'_> {
#[inline(always)]
fn finish(&self) -> u64 {
self.digest().hash()
}
#[inline(always)]
fn write(&mut self, bytes: &[u8]) {
Self::write(self, bytes);
}
}
impl std::io::Write for Fingerprinter<'_> {
#[inline(always)]
fn write(&mut self, bytes: &[u8]) -> std::io::Result<usize> {
Self::write(self, bytes);
Ok(bytes.len())
}
#[inline(always)]
fn flush(&mut self) -> std::io::Result<()> {
Ok(())
}
}
#[cfg(test)]
mod tests {
use crate::{Fingerprint, Fingerprinter, Params, UmashComponent};
#[test]
fn test_example_case() {
let key = b"hello example.c";
let input = b"the quick brown fox";
let seed = 42u64;
let my_params = Params::derive(0, key);
let fprint = my_params.fingerprinter(seed).write(input).digest();
assert_eq!(
fprint,
Fingerprint::new(0x398c5bb5cc113d03, 0x3a52693519575aba)
);
assert_eq!(fprint.hash(), 0x398c5bb5cc113d03);
assert_eq!(fprint.secondary(), 0x3a52693519575aba);
assert_eq!(fprint.component(UmashComponent::Hash), 0x398c5bb5cc113d03);
assert_eq!(
fprint.component(UmashComponent::Secondary),
0x3a52693519575aba
);
}
#[test]
fn test_example_case_hashers() {
use std::hash::Hasher as StdHasher;
let key = b"hello example.c";
let input = b"the quick brown fox";
let seed = 42u64;
let params = Params::derive(0, key);
let mut hasher = params.hasher(seed);
let mut secondary = params.secondary_hasher(seed);
let mut fprint = params.fingerprinter(seed);
hasher.write(input);
secondary.write(input);
fprint.write(input);
assert_eq!(hasher.finish(), 0x398c5bb5cc113d03);
assert_eq!(secondary.finish(), 0x3a52693519575aba);
assert_eq!(
fprint.digest(),
Fingerprint::new(0x398c5bb5cc113d03, 0x3a52693519575aba)
);
}
#[test]
fn test_example_case_with_separate_params() {
use std::hash::Hasher as StdHasher;
let params = Params::derive(0, b"hello example.c");
let mut h = params.component_hasher(42, UmashComponent::Hash);
StdHasher::write(&mut h, b"the quick brown fox");
assert_eq!(h.finish(), 0x398c5bb5cc113d03);
}
#[test]
fn test_secondary_example_case_with_separate_params() {
use std::io::Write;
let params = Params::derive(0, b"hello example.c");
let mut h = params.component_hasher(42, UmashComponent::Secondary);
let message = b"the quick brown fox";
assert_eq!(
Write::write(&mut h, message).expect("must succeed"),
message.len()
);
assert_eq!(h.digest(), 0x3a52693519575aba);
h.flush().expect("must succeed");
assert_eq!(h.digest(), 0x3a52693519575aba);
}
#[test]
fn test_another_case_with_separate_params() {
use std::hash::Hasher as StdHasher;
let params = Params::derive(0, b"backtrace");
let mut h = params.fingerprinter(0xcd03);
StdHasher::write(&mut h, b"the quick brown fox");
assert_eq!(h.finish(), 0x931972393b291c81);
}
#[test]
fn test_another_case_with_separate_params_as_write() {
use std::io::Write;
let params = Params::derive(0, b"backtrace");
let mut h = params.fingerprinter(0xcd03);
let message = b"the quick brown fox";
assert_eq!(
Write::write(&mut h, message).expect("must succeed"),
message.len()
);
assert_eq!(
h.digest(),
Fingerprint::new(10599628788124425345, 10827422672915900785)
);
h.flush().expect("must succeed");
assert_eq!(
h.digest(),
Fingerprint::new(10599628788124425345, 10827422672915900785)
);
}
#[test]
fn test_into_fingerprinter_as_hasher() {
use std::hash::Hasher as StdHasher;
let params = Params::derive(0, b"backtrace");
let mut h: Fingerprinter = (¶ms).into();
StdHasher::write(&mut h, b"the quick brown fox");
assert_eq!(h.finish(), 3130985775916891977);
}
#[test]
fn test_simple_hashes() {
let params: Params = Default::default();
let hash = params.hash(100i32);
let secondary = params.secondary(100i32);
let fingerprint = params.fingerprint(100i32);
assert_ne!(hash, secondary);
assert_eq!(fingerprint, Fingerprint::new(hash, secondary));
}
#[test]
fn test_hash_map() {
use std::collections::HashMap;
let params = Params::new();
let mut map: HashMap<i32, i32, _> = HashMap::with_hasher(¶ms);
map.insert(1, 2);
assert_eq!(map.get(&1), Some(&2));
}
}