Skip to main content

nodedb_array/coord/
string_hash.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! String-dim hashing helper shared between curve normalization and
4//! tile-index bucketing.
5//!
6//! Both [`super::normalize`] (when projecting `String` coords onto a
7//! space-filling curve) and [`crate::tile::layout`] (when bucketing
8//! `String` coords into tile indices) need the same per-process
9//! mapping from a string to an integer. Centralising it here keeps
10//! their assignments consistent within a process and gives a single
11//! site to swap for sort-preserving codes later.
12//!
13//! `RandomState` is per-process; bucket assignments are NOT stable
14//! across restarts. Future work may upgrade to a deterministic
15//! dictionary code.
16
17use std::hash::{BuildHasher, Hasher};
18
19/// Hash `s` into the range `[0, bound]` (inclusive) by masking with
20/// `bound`. Caller passes `bound = (1 << bits) - 1` for power-of-two
21/// curve buckets, or `extent - 1` style values for tile bucketing
22/// (where `bound + 1` is a power of two); for arbitrary `bound`,
23/// callers should use [`hash_string_modulo`] instead.
24pub fn hash_string_masked(s: &str, bound: u64) -> u64 {
25    let mut h = std::collections::hash_map::RandomState::new().build_hasher();
26    h.write(s.as_bytes());
27    if bound == 0 { 0 } else { h.finish() & bound }
28}
29
30/// Hash `s` into `[0, modulus)` via modulo. Used by tile-index
31/// bucketing where the extent isn't necessarily a power of two.
32pub fn hash_string_modulo(s: &str, modulus: u64) -> u64 {
33    let mut h = std::collections::hash_map::RandomState::new().build_hasher();
34    h.write(s.as_bytes());
35    if modulus <= 1 {
36        0
37    } else {
38        h.finish() % modulus
39    }
40}
41
42#[cfg(test)]
43mod tests {
44    use super::*;
45
46    #[test]
47    fn masked_hash_within_bound() {
48        let bound = 0xFFu64;
49        for i in 0..32 {
50            let s = format!("k{i}");
51            assert!(hash_string_masked(&s, bound) <= bound);
52        }
53    }
54
55    #[test]
56    fn modulo_hash_within_modulus() {
57        let m = 7u64;
58        for i in 0..32 {
59            let s = format!("k{i}");
60            assert!(hash_string_modulo(&s, m) < m);
61        }
62    }
63
64    #[test]
65    fn zero_modulus_returns_zero() {
66        assert_eq!(hash_string_modulo("x", 0), 0);
67        assert_eq!(hash_string_modulo("x", 1), 0);
68    }
69}