float_hash_of/
lib.rs

1use hash_of::*;
2use std::borrow::Borrow;
3use std::hash::Hash;
4use std::marker::PhantomData;
5use std::num::NonZeroU64;
6
7/// Takes a 64 bit hash, and makes a primitive 63 bit hash using a double.
8/// This allows for easy comparison without keeping making heap allocations in JavaScript (eg: string) or requiring low entropy (int)
9#[derive(Eq, PartialEq, Debug, Hash)]
10pub struct FloatHashOf<T> {
11    // There's no such thing as a NonZeroF64, so store as NonZeroU64 and transmute when necessary.
12    // This let's us store it in Option without increasing the size.
13    hash: NonZeroU64,
14    _marker: PhantomData<*const T>, // Indicate we do not own T
15}
16
17// Manually implementing Copy/Clone because they are not automatically derived
18// if T does not equal Copy/Clone
19impl<T> Clone for FloatHashOf<T> {
20    fn clone(&self) -> Self {
21        *self
22    }
23}
24
25impl<T> Copy for FloatHashOf<T> {}
26
27impl<T> FloatHashOf<T> {
28    #[inline]
29    pub fn into_inner(self) -> f64 {
30        f64::from_bits(self.hash.get())
31    }
32}
33
34// We want to avoid NaN (unexpected equality rules), subnormals (they may be slow?),
35// and +-0 (inconsistent equality rules, useful as a null hash)
36// so ensure there is at least one each of 0 and 1 in the exponent to make those cases impossible.
37// This also rules out +-INF
38fn hash_63_bits(hash: u64) -> u64 {
39    #![allow(clippy::inconsistent_digit_grouping)] // Grouping matches 64bit IEEE 754 float
40
41    // TODO: This assumes little-endian, but we could cgf the big-endian format in
42    const EXP_2: u64 = 0b0_11000000000_0000000000000000000000000000000000000000000000000000;
43    const EXP_1: u64 = 0b0_10000000000_0000000000000000000000000000000000000000000000000000;
44    const EXP_0: u64 = 0b0_00000000000_0000000000000000000000000000000000000000000000000000;
45
46    match hash & EXP_2 {
47        EXP_0 => hash | EXP_1,
48        EXP_2 => hash ^ EXP_1,
49        _ => hash,
50    }
51}
52
53pub fn hash_u64_to_f64(hash: u64) -> f64 {
54    f64::from_bits(hash_63_bits(hash))
55}
56
57impl<T> From<HashOf<T>> for FloatHashOf<T> {
58    fn from(hash: HashOf<T>) -> Self {
59        let mut hash = hash.to_inner();
60        hash = hash_63_bits(hash);
61
62        Self {
63            hash: unsafe { NonZeroU64::new_unchecked(hash) },
64            _marker: PhantomData,
65        }
66    }
67}
68
69// Example types to explain the confusing signature...
70// T: str
71// Q: String
72impl<T: Hash + ?Sized, Q: Borrow<T>> From<&T> for FloatHashOf<Q> {
73    fn from(value: &T) -> Self {
74        let hash = HashOf::<Q>::from(value);
75        hash.into()
76    }
77}
78
79#[cfg(test)]
80mod tests {
81    use super::*;
82    use std::collections::HashSet;
83
84    fn test_cases() -> HashSet<u64> {
85        let mut cases = HashSet::new();
86        cases.insert(0);
87        for start in 0..16 {
88            for shift in 0..(0u64.count_zeros()) {
89                let case = start << shift;
90                cases.insert(case);
91            }
92        }
93        cases
94    }
95
96    #[test]
97    fn no_invalid_values() {
98        let cases = test_cases();
99        for &case in cases.iter() {
100            let result = hash_u64_to_f64(case);
101            assert!(result != 0.);
102            assert!(!result.is_nan());
103        }
104    }
105}