nodedb_array/coord/string_hash.rs
1// SPDX-License-Identifier: Apache-2.0
2
3//! String-dim hashing helper shared between curve normalization and
4//! tile-index bucketing.
5//!
6//! Both [`super::normalize`] (when projecting `String` coords onto a
7//! space-filling curve) and [`crate::tile::layout`] (when bucketing
8//! `String` coords into tile indices) need the same per-process
9//! mapping from a string to an integer. Centralising it here keeps
10//! their assignments consistent within a process and gives a single
11//! site to swap for sort-preserving codes later.
12//!
13//! `RandomState` is per-process; bucket assignments are NOT stable
14//! across restarts. Future work may upgrade to a deterministic
15//! dictionary code.
16
17use std::hash::{BuildHasher, Hasher};
18
19/// Hash `s` into the range `[0, bound]` (inclusive) by masking with
20/// `bound`. Caller passes `bound = (1 << bits) - 1` for power-of-two
21/// curve buckets, or `extent - 1` style values for tile bucketing
22/// (where `bound + 1` is a power of two); for arbitrary `bound`,
23/// callers should use [`hash_string_modulo`] instead.
24pub fn hash_string_masked(s: &str, bound: u64) -> u64 {
25 let mut h = std::collections::hash_map::RandomState::new().build_hasher();
26 h.write(s.as_bytes());
27 if bound == 0 { 0 } else { h.finish() & bound }
28}
29
30/// Hash `s` into `[0, modulus)` via modulo. Used by tile-index
31/// bucketing where the extent isn't necessarily a power of two.
32pub fn hash_string_modulo(s: &str, modulus: u64) -> u64 {
33 let mut h = std::collections::hash_map::RandomState::new().build_hasher();
34 h.write(s.as_bytes());
35 if modulus <= 1 {
36 0
37 } else {
38 h.finish() % modulus
39 }
40}
41
42#[cfg(test)]
43mod tests {
44 use super::*;
45
46 #[test]
47 fn masked_hash_within_bound() {
48 let bound = 0xFFu64;
49 for i in 0..32 {
50 let s = format!("k{i}");
51 assert!(hash_string_masked(&s, bound) <= bound);
52 }
53 }
54
55 #[test]
56 fn modulo_hash_within_modulus() {
57 let m = 7u64;
58 for i in 0..32 {
59 let s = format!("k{i}");
60 assert!(hash_string_modulo(&s, m) < m);
61 }
62 }
63
64 #[test]
65 fn zero_modulus_returns_zero() {
66 assert_eq!(hash_string_modulo("x", 0), 0);
67 assert_eq!(hash_string_modulo("x", 1), 0);
68 }
69}