rustpython_common/
hash.rs

1use malachite_bigint::BigInt;
2use num_traits::ToPrimitive;
3use siphasher::sip::SipHasher24;
4use std::hash::{BuildHasher, Hash, Hasher};
5
6pub type PyHash = i64;
7pub type PyUHash = u64;
8
9/// A PyHash value used to represent a missing hash value, e.g. means "not yet computed" for
10/// `str`'s hash cache
11pub const SENTINEL: PyHash = -1;
12
13/// Prime multiplier used in string and various other hashes.
14pub const MULTIPLIER: PyHash = 1_000_003; // 0xf4243
15/// Numeric hashes are based on reduction modulo the prime 2**_BITS - 1
16pub const BITS: usize = 61;
17pub const MODULUS: PyUHash = (1 << BITS) - 1;
18pub const INF: PyHash = 314_159;
19pub const NAN: PyHash = 0;
20pub const IMAG: PyHash = MULTIPLIER;
21pub const ALGO: &str = "siphash24";
22pub const HASH_BITS: usize = std::mem::size_of::<PyHash>() * 8;
23// SipHasher24 takes 2 u64s as a seed
24pub const SEED_BITS: usize = std::mem::size_of::<u64>() * 2 * 8;
25
26// pub const CUTOFF: usize = 7;
27
28pub struct HashSecret {
29    k0: u64,
30    k1: u64,
31}
32
33impl BuildHasher for HashSecret {
34    type Hasher = SipHasher24;
35    fn build_hasher(&self) -> Self::Hasher {
36        SipHasher24::new_with_keys(self.k0, self.k1)
37    }
38}
39
40impl rand::distributions::Distribution<HashSecret> for rand::distributions::Standard {
41    fn sample<R: rand::Rng + ?Sized>(&self, rng: &mut R) -> HashSecret {
42        HashSecret {
43            k0: rng.gen(),
44            k1: rng.gen(),
45        }
46    }
47}
48
49impl HashSecret {
50    pub fn new(seed: u32) -> Self {
51        let mut buf = [0u8; 16];
52        lcg_urandom(seed, &mut buf);
53        let (left, right) = buf.split_at(8);
54        let k0 = u64::from_le_bytes(left.try_into().unwrap());
55        let k1 = u64::from_le_bytes(right.try_into().unwrap());
56        Self { k0, k1 }
57    }
58}
59
60impl HashSecret {
61    pub fn hash_value<T: Hash + ?Sized>(&self, data: &T) -> PyHash {
62        fix_sentinel(mod_int(self.hash_one(data) as _))
63    }
64
65    pub fn hash_iter<'a, T: 'a, I, F, E>(&self, iter: I, hashf: F) -> Result<PyHash, E>
66    where
67        I: IntoIterator<Item = &'a T>,
68        F: Fn(&'a T) -> Result<PyHash, E>,
69    {
70        let mut hasher = self.build_hasher();
71        for element in iter {
72            let item_hash = hashf(element)?;
73            item_hash.hash(&mut hasher);
74        }
75        Ok(fix_sentinel(mod_int(hasher.finish() as PyHash)))
76    }
77
78    pub fn hash_bytes(&self, value: &[u8]) -> PyHash {
79        if value.is_empty() {
80            0
81        } else {
82            self.hash_value(value)
83        }
84    }
85
86    pub fn hash_str(&self, value: &str) -> PyHash {
87        self.hash_bytes(value.as_bytes())
88    }
89}
90
91#[inline]
92pub fn hash_pointer(value: usize) -> PyHash {
93    // TODO: 32bit?
94    let hash = (value >> 4) | value;
95    hash as _
96}
97
98#[inline]
99pub fn hash_float(value: f64) -> Option<PyHash> {
100    // cpython _Py_HashDouble
101    if !value.is_finite() {
102        return if value.is_infinite() {
103            Some(if value > 0.0 { INF } else { -INF })
104        } else {
105            None
106        };
107    }
108
109    let frexp = super::float_ops::ufrexp(value);
110
111    // process 28 bits at a time;  this should work well both for binary
112    // and hexadecimal floating point.
113    let mut m = frexp.0;
114    let mut e = frexp.1;
115    let mut x: PyUHash = 0;
116    while m != 0.0 {
117        x = ((x << 28) & MODULUS) | x >> (BITS - 28);
118        m *= 268_435_456.0; // 2**28
119        e -= 28;
120        let y = m as PyUHash; // pull out integer part
121        m -= y as f64;
122        x += y;
123        if x >= MODULUS {
124            x -= MODULUS;
125        }
126    }
127
128    // adjust for the exponent;  first reduce it modulo BITS
129    const BITS32: i32 = BITS as i32;
130    e = if e >= 0 {
131        e % BITS32
132    } else {
133        BITS32 - 1 - ((-1 - e) % BITS32)
134    };
135    x = ((x << e) & MODULUS) | x >> (BITS32 - e);
136
137    Some(fix_sentinel(x as PyHash * value.signum() as PyHash))
138}
139
140pub fn hash_bigint(value: &BigInt) -> PyHash {
141    let ret = match value.to_i64() {
142        Some(i) => mod_int(i),
143        None => (value % MODULUS).to_i64().unwrap_or_else(|| unsafe {
144            // SAFETY: MODULUS < i64::MAX, so value % MODULUS is guaranteed to be in the range of i64
145            std::hint::unreachable_unchecked()
146        }),
147    };
148    fix_sentinel(ret)
149}
150
151#[inline(always)]
152pub fn fix_sentinel(x: PyHash) -> PyHash {
153    if x == SENTINEL {
154        -2
155    } else {
156        x
157    }
158}
159
160#[inline]
161pub fn mod_int(value: i64) -> PyHash {
162    value % MODULUS as i64
163}
164
165pub fn lcg_urandom(mut x: u32, buf: &mut [u8]) {
166    for b in buf {
167        x = x.wrapping_mul(214013);
168        x = x.wrapping_add(2531011);
169        *b = ((x >> 16) & 0xff) as u8;
170    }
171}
172
173#[inline]
174pub fn hash_object_id_raw(p: usize) -> PyHash {
175    // TODO: Use commented logic when below issue resolved.
176    // Ref: https://github.com/RustPython/RustPython/pull/3951#issuecomment-1193108966
177
178    /* bottom 3 or 4 bits are likely to be 0; rotate y by 4 to avoid
179    excessive hash collisions for dicts and sets */
180    // p.rotate_right(4) as PyHash
181    p as PyHash
182}
183
184#[inline]
185pub fn hash_object_id(p: usize) -> PyHash {
186    fix_sentinel(hash_object_id_raw(p))
187}
188
189pub fn keyed_hash(key: u64, buf: &[u8]) -> u64 {
190    let mut hasher = SipHasher24::new_with_keys(key, 0);
191    buf.hash(&mut hasher);
192    hasher.finish()
193}