Skip to main content

rustpython_common/
hash.rs

1use core::hash::{BuildHasher, Hash, Hasher};
2use malachite_bigint::BigInt;
3use num_traits::ToPrimitive;
4use siphasher::sip::SipHasher24;
5
6pub type PyHash = i64;
7pub type PyUHash = u64;
8
9/// A PyHash value used to represent a missing hash value, e.g. means "not yet computed" for
10/// `str`'s hash cache
11pub const SENTINEL: PyHash = -1;
12
13/// Prime multiplier used in string and various other hashes.
14pub const MULTIPLIER: PyHash = 1_000_003; // 0xf4243
15/// Numeric hashes are based on reduction modulo the prime 2**_BITS - 1
16pub const BITS: usize = 61;
17pub const MODULUS: PyUHash = (1 << BITS) - 1;
18pub const INF: PyHash = 314_159;
19pub const NAN: PyHash = 0;
20pub const IMAG: PyHash = MULTIPLIER;
21pub const ALGO: &str = "siphash24";
22pub const HASH_BITS: usize = core::mem::size_of::<PyHash>() * 8;
23// SipHasher24 takes 2 u64s as a seed
24pub const SEED_BITS: usize = core::mem::size_of::<u64>() * 2 * 8;
25
26// pub const CUTOFF: usize = 7;
27
28#[derive(Clone, Copy)]
29pub struct HashSecret {
30    k0: u64,
31    k1: u64,
32}
33
34impl BuildHasher for HashSecret {
35    type Hasher = SipHasher24;
36
37    fn build_hasher(&self) -> Self::Hasher {
38        SipHasher24::new_with_keys(self.k0, self.k1)
39    }
40}
41
42impl HashSecret {
43    pub fn new(seed: u32) -> Self {
44        let mut buf = [0u8; 16];
45        lcg_urandom(seed, &mut buf);
46        let (left, right) = buf.split_at(8);
47        let k0 = u64::from_le_bytes(left.try_into().unwrap());
48        let k1 = u64::from_le_bytes(right.try_into().unwrap());
49        Self { k0, k1 }
50    }
51}
52
53impl HashSecret {
54    pub fn hash_value<T: Hash + ?Sized>(&self, data: &T) -> PyHash {
55        fix_sentinel(mod_int(self.hash_one(data) as _))
56    }
57
58    pub fn hash_iter<'a, T: 'a, I, F, E>(&self, iter: I, hash_func: F) -> Result<PyHash, E>
59    where
60        I: IntoIterator<Item = &'a T>,
61        F: Fn(&'a T) -> Result<PyHash, E>,
62    {
63        let mut hasher = self.build_hasher();
64        for element in iter {
65            let item_hash = hash_func(element)?;
66            item_hash.hash(&mut hasher);
67        }
68        Ok(fix_sentinel(mod_int(hasher.finish() as PyHash)))
69    }
70
71    pub fn hash_bytes(&self, value: &[u8]) -> PyHash {
72        if value.is_empty() {
73            0
74        } else {
75            self.hash_value(value)
76        }
77    }
78
79    pub fn hash_str(&self, value: &str) -> PyHash {
80        self.hash_bytes(value.as_bytes())
81    }
82}
83
84#[inline]
85pub const fn hash_pointer(value: usize) -> PyHash {
86    // TODO: 32bit?
87    let hash = (value >> 4) | value;
88    hash as _
89}
90
91#[inline]
92pub fn hash_float(value: f64) -> Option<PyHash> {
93    // cpython _Py_HashDouble
94    if !value.is_finite() {
95        return if value.is_infinite() {
96            Some(if value > 0.0 { INF } else { -INF })
97        } else {
98            None
99        };
100    }
101
102    let frexp = super::float_ops::decompose_float(value);
103
104    // process 28 bits at a time;  this should work well both for binary
105    // and hexadecimal floating point.
106    let mut m = frexp.0;
107    let mut e = frexp.1;
108    let mut x: PyUHash = 0;
109    while m != 0.0 {
110        x = ((x << 28) & MODULUS) | (x >> (BITS - 28));
111        m *= 268_435_456.0; // 2**28
112        e -= 28;
113        let y = m as PyUHash; // pull out integer part
114        m -= y as f64;
115        x += y;
116        if x >= MODULUS {
117            x -= MODULUS;
118        }
119    }
120
121    // adjust for the exponent;  first reduce it modulo BITS
122    const BITS32: i32 = BITS as i32;
123    e = if e >= 0 {
124        e % BITS32
125    } else {
126        BITS32 - 1 - ((-1 - e) % BITS32)
127    };
128    x = ((x << e) & MODULUS) | (x >> (BITS32 - e));
129
130    Some(fix_sentinel(x as PyHash * value.signum() as PyHash))
131}
132
133pub fn hash_bigint(value: &BigInt) -> PyHash {
134    let ret = match value.to_i64() {
135        Some(i) => mod_int(i),
136        None => (value % MODULUS).to_i64().unwrap_or_else(|| unsafe {
137            // SAFETY: MODULUS < i64::MAX, so value % MODULUS is guaranteed to be in the range of i64
138            core::hint::unreachable_unchecked()
139        }),
140    };
141    fix_sentinel(ret)
142}
143
144#[inline]
145pub const fn hash_usize(data: usize) -> PyHash {
146    fix_sentinel(mod_int(data as i64))
147}
148
149#[inline(always)]
150pub const fn fix_sentinel(x: PyHash) -> PyHash {
151    if x == SENTINEL { -2 } else { x }
152}
153
154#[inline]
155pub const fn mod_int(value: i64) -> PyHash {
156    value % MODULUS as i64
157}
158
159pub fn lcg_urandom(mut x: u32, buf: &mut [u8]) {
160    for b in buf {
161        x = x.wrapping_mul(214013);
162        x = x.wrapping_add(2531011);
163        *b = ((x >> 16) & 0xff) as u8;
164    }
165}
166
167#[inline]
168pub const fn hash_object_id_raw(p: usize) -> PyHash {
169    // TODO: Use commented logic when below issue resolved.
170    // Ref: https://github.com/RustPython/RustPython/pull/3951#issuecomment-1193108966
171
172    /* bottom 3 or 4 bits are likely to be 0; rotate y by 4 to avoid
173    excessive hash collisions for dicts and sets */
174    // p.rotate_right(4) as PyHash
175    p as PyHash
176}
177
178#[inline]
179pub const fn hash_object_id(p: usize) -> PyHash {
180    fix_sentinel(hash_object_id_raw(p))
181}
182
183pub fn keyed_hash(key: u64, buf: &[u8]) -> u64 {
184    let mut hasher = SipHasher24::new_with_keys(key, 0);
185    buf.hash(&mut hasher);
186    hasher.finish()
187}