Skip to main content

hashkit/
lib.rs

1#![warn(missing_docs)]
2#![forbid(unsafe_code)]
3//! Unified non-cryptographic hash functions for performance-sensitive crates.
4//!
5//! The crate exposes three small, dependency-free building blocks:
6//! - [`fnv`] for stable FNV-1a hashing, including the flashsieve-compatible
7//!   two-byte fast path.
8//! - [`splitmix`] for high-quality seed finalization and compact pair hashing.
9//! - [`wyhash`] for fast bulk hashing of arbitrary byte slices.
10//! - [`blake3_hash`] for stable, content-addressed streaming hashing using BLAKE3.
11//!
12//! # Examples
13//!
14//! ```
15//! use hashkit::{bloom_hash_pair, hash_to_index};
16//!
17//! let (h1, h2) = bloom_hash_pair(b'a', b'b');
18//! let slot = hash_to_index(h1 ^ h2, 1024);
19//!
20//! assert!(slot < 1024);
21//! ```
22
23pub mod blake3_hash;
24pub mod fnv;
25pub mod splitmix;
26pub mod wyhash;
27
28/// Returns the two hash functions used for double-hashed bloom filter probes.
29///
30/// The first element is the flashsieve-compatible FNV-1a hash and the second is
31/// the SplitMix64-derived hash.
32///
33/// # Examples
34///
35/// ```
36/// let (first, second) = hashkit::bloom_hash_pair(b'x', b'y');
37///
38/// assert_ne!(first, 0);
39/// assert_ne!(second, 0);
40/// ```
41#[inline]
42#[must_use]
43pub fn bloom_hash_pair(a: u8, b: u8) -> (u64, u64) {
44    (fnv::fnv1a_pair(a, b), splitmix::pair(a, b))
45}
46
47/// Reduces a hash into a bit index.
48///
49/// This uses fast power-of-two masking when `num_bits` is a power of two,
50/// and falls back to a modulo operation for other values. If `num_bits` is
51/// zero, it safely returns zero to avoid division-by-zero panics.
52///
53/// # Examples
54///
55/// ```
56/// let index = hashkit::hash_to_index(0xDEAD_BEEF, 256);
57///
58/// assert!(index < 256);
59/// ```
60#[inline]
61#[must_use]
62pub fn hash_to_index(hash: u64, num_bits: usize) -> usize {
63    if num_bits == 0 {
64        return 0;
65    }
66    #[allow(clippy::cast_possible_truncation)]
67    if num_bits.is_power_of_two() {
68        (hash & ((num_bits as u64).wrapping_sub(1))) as usize
69    } else {
70        (hash % (num_bits as u64)) as usize
71    }
72}
73
74#[cfg(test)]
75mod tests {
76    use super::{bloom_hash_pair, fnv, hash_to_index, splitmix, wyhash};
77
78    const SAMPLE_SEED: u64 = 0x0123_4567_89AB_CDEF;
79
80    #[test]
81    fn bloom_hash_pair_matches_components() {
82        let pair = bloom_hash_pair(b'a', b'b');
83        assert_eq!(pair.0, fnv::fnv1a_pair(b'a', b'b'));
84        assert_eq!(pair.1, splitmix::pair(b'a', b'b'));
85    }
86
87    #[test]
88    fn bloom_hash_pair_is_non_zero_for_common_pair() {
89        let (first, second) = bloom_hash_pair(b'n', b'g');
90        assert_ne!(first, 0);
91        assert_ne!(second, 0);
92    }
93
94    #[test]
95    fn hash_to_index_zero_hash_maps_to_zero() {
96        assert_eq!(hash_to_index(0, 64), 0);
97    }
98
99    #[test]
100    fn hash_to_index_masks_large_hashes() {
101        assert_eq!(hash_to_index(u64::MAX, 1024), 1023);
102    }
103
104    #[test]
105    fn hash_to_index_stays_in_bounds() {
106        for bits in [2_usize, 4, 8, 64, 256, 4096] {
107            for hash in [0_u64, 1, 7, 31, 255, u64::MAX, SAMPLE_SEED] {
108                assert!(hash_to_index(hash, bits) < bits);
109            }
110        }
111    }
112
113    #[test]
114    fn hash_to_index_handles_non_power_of_two() {
115        assert_eq!(hash_to_index(7, 10), 7);
116        assert_eq!(hash_to_index(13, 10), 3);
117        assert_eq!(hash_to_index(0, 10), 0);
118    }
119
120    #[test]
121    fn hash_to_index_handles_zero() {
122        assert_eq!(hash_to_index(7, 0), 0);
123    }
124
125    #[test]
126    fn wyhash_changes_when_seed_changes() {
127        assert_ne!(wyhash::hash(b"abc", 1), wyhash::hash(b"abc", 2));
128    }
129
130    #[test]
131    fn wyhash_changes_when_input_changes() {
132        assert_ne!(
133            wyhash::hash(b"abc", SAMPLE_SEED),
134            wyhash::hash(b"abd", SAMPLE_SEED)
135        );
136    }
137}