simd_r_drive/storage_engine/digest/
compute_hash.rs

1use xxhash_rust::xxh3::xxh3_64;
2
3/// Computes a 64-bit hash for the given key using XXH3.
4///
5/// XXH3 is a high-performance, non-cryptographic hash function optimized for speed
6/// and efficiency. It leverages SIMD (Single Instruction, Multiple Data) and
7/// hardware acceleration when available (e.g., AVX2, NEON) for even faster hashing.
8/// This function provides a fast way to generate a unique identifier for a given
9/// byte slice, making it suitable for key indexing in hash maps.
10///
11/// # Parameters
12/// - `key`: A byte slice representing the key to be hashed.
13///
14/// # Returns
15/// - A `u64` hash value derived from the input key.
16///
17/// #Notes:
18///
19/// Stream writing does not call this directly and instead builds `checksum_state` off
20/// the hasher directly.
21///
22/// See `crate::storage_engine::DataStore::write_stream_with_key_hash` for implementation
23/// details.
24#[inline]
25pub fn compute_hash(key: &[u8]) -> u64 {
26    xxh3_64(key)
27}
28
29/// Computes XXH3 64-bit hashes for a batch of keys **in one call**.
30///
31/// The function walks the `keys` slice only once, feeding each key into
32/// `xxh3_64`.  Internally `xxh3_64` already dispatches to the fastest SIMD
33/// implementation available on the host (AVX2, NEON, …), so you still get the
34/// intrinsic acceleration for *each* key – but with the added benefit that
35/// you can:
36///
37/// * call the hasher exactly **once** from the high-level API,
38/// * pre-allocate the `Vec<u64>` only once,
39/// * hand the resulting `(hash, payload)` tuples straight to
40///   `batch_write_with_key_hashes`, keeping the critical section (the `RwLock`)
41///   as small as possible.
42///
43/// # Parameters
44/// * `keys` – slice of key byte-slices; the `n`-th output hash corresponds to
45///   the `n`-th input key.
46///
47/// # Returns
48/// A `Vec<u64>` whose length equals `keys.len()`, containing the XXH3 hash of
49/// each key.
50///
51/// # Examples
52/// ```
53/// use simd_r_drive::storage_engine::digest::{compute_hash, compute_hash_batch};
54///
55/// let keys: &[&[u8]] = &[b"alice", b"bob", b"carol"];
56/// let hashes = compute_hash_batch(keys);
57///
58/// assert_eq!(hashes.len(), 3);
59/// assert_eq!(hashes[0], compute_hash(b"alice"));
60/// assert_eq!(hashes[1], compute_hash(b"bob"));
61/// assert_eq!(hashes[2], compute_hash(b"carol"));
62/// ```
63#[inline]
64pub fn compute_hash_batch(keys: &[&[u8]]) -> Vec<u64> {
65    // TODO: Look into more efficient approaches that can work on a matrix of
66    // keys without iterating over them.
67
68    // A plain loop beats an iterator here; it lets LLVM unroll/vectorize freely.
69    let mut out = Vec::with_capacity(keys.len());
70
71    // TODO: For a large amount of keys, consider distributing the work with Rayon
72    for k in keys {
73        // xxh3_64 already uses SIMD internally where available.
74        out.push(xxh3_64(k));
75    }
76    out
77}