nthash_rs/util.rs
1//! Miscellaneous helpers shared across all ntHash variants.
2//!
3//! This module provides two small yet critical building blocks used by
4//! every ntHash implementation (`NtHash`, `SeedNtHash`, `BlindNtHash`, etc.):
5//!
6//! - **`canonical`** — combine forward and reverse‐complement hashes into a
7//! strand‐independent value by wrapping addition.
8//! - **`extend_hashes`** — generate a sequence of "extra" hash values from
9//! one canonical base hash, matching the C++ reference’s multiplicative
10//! mixing and shift scheme.
11//!
12//! These functions are marked `#[inline]` for zero‐overhead calls in hot paths,
13//! and the code is dependency‐free (only `core`/`std`), so it can be used
14//! in no‐std contexts if needed.
15
16use crate::constants::{MULTISEED, MULTISHIFT};
17
18/// Combine forward and reverse‐complement strand hashes into a single
19/// *canonical* k‑mer hash (strand‐independent).
20///
21/// The original ntHash definition simply **adds** the two 64‑bit words with
22/// wrapping arithmetic to remain well‐defined on overflow.
23///
24/// # Examples
25///
26/// ```
27/// # use nthash_rs::util::canonical;
28/// let fwd = 0xFFFF_FFFF_FFFF_FFFF;
29/// let rev = 1;
30/// // wraps around to 0
31/// assert_eq!(canonical(fwd, rev), 0);
32/// ```
33#[inline(always)]
34pub const fn canonical(fwd: u64, rev: u64) -> u64 {
35 fwd.wrapping_add(rev)
36}
37
38/// Expand a single canonical hash into a user‐provided slice of additional
39/// hash values.
40///
41/// This implements the same scheme as the C++ ntHash reference:
42/// each extra hash `h_i` (for `i ≥ 1`) is computed as:
43///
44/// ```text
45/// mix = (i as u64) ^ (k as u64 * MULTISEED)
46/// h_i = base.wrapping_mul(mix)
47/// h_i ^= h_i >> MULTISHIFT
48/// ```
49///
50/// - `fwd`, `rev` — forward and reverse‐complement strand hashes.
51/// - `k` — k‑mer span or seed weight, used in the mixing step.
52/// - `hashes` — output slice; the length determines how many values
53/// (including the canonical hash at index 0) are generated.
54///
55/// If `hashes` is empty, this function returns immediately, avoiding any
56/// unnecessary branching in callers.
57///
58/// # Examples
59///
60/// ```
61/// # use nthash_rs::util::extend_hashes;
62/// let mut out = [0u64; 4];
63/// extend_hashes(0x1234, 0x5678, 5, &mut out);
64/// assert_eq!(out[0], 0x1234u64.wrapping_add(0x5678));
65/// // subsequent elements are nonzero mixes
66/// assert!(out[1] != out[0]);
67/// ```
68#[inline]
69pub fn extend_hashes(fwd: u64, rev: u64, k: u32, hashes: &mut [u64]) {
70 match hashes.len() {
71 0 => return,
72 1 => {
73 hashes[0] = canonical(fwd, rev);
74 return;
75 }
76 _ => {}
77 }
78
79 // Base (canonical) hash at index 0
80 let base = canonical(fwd, rev);
81 hashes[0] = base;
82
83 let seed = (k as u64).wrapping_mul(MULTISEED);
84
85 // Compute extra hashes for i = 1 .. len−1
86 for (i, slot) in hashes.iter_mut().enumerate().skip(1) {
87 let mut h = base.wrapping_mul((i as u64) ^ seed);
88 h ^= h >> MULTISHIFT;
89 *slot = h;
90 }
91}
92
93#[cfg(test)]
94mod tests {
95 use super::*;
96
97 #[test]
98 fn canonical_wraps_on_overflow() {
99 let max = u64::MAX;
100 assert_eq!(canonical(max, 1), 0);
101 }
102
103 #[test]
104 fn extend_zero_length_slice() {
105 let mut out: [u64; 0] = [];
106 extend_hashes(123, 456, 7, &mut out);
107 // no panic, no change
108 }
109
110 #[test]
111 fn extend_matches_cpp_reference() {
112 const F: u64 = 0x1234_5678_9ABC_DEF0;
113 const R: u64 = 0x0FED_CBA9_8765_4321;
114 const K: u32 = 21;
115 let mut v = [0u64; 8];
116 extend_hashes(F, R, K, &mut v);
117 let base = F.wrapping_add(R);
118 for i in 0..v.len() {
119 let expected = if i == 0 {
120 base
121 } else {
122 let mut t = base.wrapping_mul((i as u64) ^ (K as u64).wrapping_mul(MULTISEED));
123 t ^= t >> MULTISHIFT;
124 t
125 };
126 assert_eq!(v[i], expected);
127 }
128 }
129}