Skip to main content

compact_dict/
ahash.rs

1const MULTIPLE: u64 = 6364136223846793005;
2const ROT: u32 = 23;
3
4#[inline]
5fn folded_multiply(s: u64, by: u64) -> u64 {
6    let b1 = s.wrapping_mul(by.swap_bytes());
7    let b2 = s.swap_bytes().wrapping_mul(!by);
8    b1 ^ b2.swap_bytes()
9}
10
11#[derive(Clone, Copy)]
12struct U128(u64, u64);
13
14#[inline]
15fn read_small(data: &[u8]) -> U128 {
16    let len = data.len();
17    if len >= 2 {
18        if len >= 4 {
19            // len 4–8: (u32 at start, u32 at end)
20            let a = u32::from_le_bytes(data[0..4].try_into().unwrap()) as u64;
21            let b = u32::from_le_bytes(data[len - 4..len].try_into().unwrap()) as u64;
22            U128(a, b)
23        } else {
24            // len 2–3: (u16 at start, last byte)
25            let a = u16::from_le_bytes([data[0], data[1]]) as u64;
26            let b = data[len - 1] as u64;
27            U128(a, b)
28        }
29    } else {
30        if len > 0 {
31            // len 1: (byte, byte)
32            let a = data[0] as u64;
33            U128(a, a)
34        } else {
35            // len 0
36            U128(0, 0)
37        }
38    }
39}
40
41struct MojoAHasher {
42    buffer: u64,
43    pad: u64,
44    extra_keys: U128,
45}
46
47impl MojoAHasher {
48    fn new(key: [u64; 4]) -> Self {
49        let pi_key = [
50            key[0] ^ 0x243f_6a88_85a3_08d3,
51            key[1] ^ 0x1319_8a2e_0370_7344,
52            key[2] ^ 0xa409_3822_299f_31d0,
53            key[3] ^ 0x082e_fa98_ec4e_6c89,
54        ];
55        Self {
56            buffer: pi_key[0],
57            pad: pi_key[1],
58            extra_keys: U128(pi_key[2], pi_key[3]),
59        }
60    }
61
62    #[inline]
63    fn large_update(&mut self, new_data: U128) {
64        let combined = folded_multiply(
65            new_data.0 ^ self.extra_keys.0,
66            new_data.1 ^ self.extra_keys.1,
67        );
68        // rotate_bits_left[ROT]((buffer + pad) ^ combined)
69        self.buffer = (self.buffer.wrapping_add(self.pad) ^ combined).rotate_left(ROT);
70    }
71
72    #[inline]
73    fn finish(&self) -> u64 {
74        let rot = self.buffer & 63;
75        let folded = folded_multiply(self.buffer, self.pad);
76        folded.rotate_left(rot as u32)
77    }
78
79    #[inline]
80    fn write(&mut self, data: &[u8]) {
81        // self.buffer = (self.buffer + length) * MULTIPLE
82        self.buffer = self
83            .buffer
84            .wrapping_add(data.len() as u64)
85            .wrapping_mul(MULTIPLE);
86
87        let len = data.len();
88        if len > 8 {
89            if len > 16 {
90                // tail = last 16 bytes
91                let tail_a = u64::from_le_bytes(data[len - 16..len - 8].try_into().unwrap());
92                let tail_b = u64::from_le_bytes(data[len - 8..len].try_into().unwrap());
93                self.large_update(U128(tail_a, tail_b));
94
95                // process 16-byte blocks from start
96                let mut offset = 0usize;
97                while len - offset > 16 {
98                    let a = u64::from_le_bytes(data[offset..offset + 8].try_into().unwrap());
99                    let b = u64::from_le_bytes(data[offset + 8..offset + 16].try_into().unwrap());
100                    self.large_update(U128(a, b));
101                    offset += 16;
102                }
103            } else {
104                // len 9–16: first 8 + last 8
105                let a = u64::from_le_bytes(data[0..8].try_into().unwrap());
106                let b = u64::from_le_bytes(data[len - 8..len].try_into().unwrap());
107                self.large_update(U128(a, b));
108            }
109        } else {
110            // Mojo write() does large_update(read_small) for <= 8,
111            // but your top-level ahash() short path bypasses write().
112            let U128(a, b) = read_small(data);
113            self.large_update(U128(a, b));
114        }
115    }
116}
117
118pub fn ahash(s: &str) -> u64 {
119    let mut hasher = MojoAHasher::new([0, 0, 0, 0]);
120    let data = s.as_bytes();
121
122    //     println!("data.len(): {}, buff: {}", data.len(), hasher.buffer);
123
124    if data.len() > 8 {
125        hasher.write(data);
126    } else {
127        // EXACT Mojo short path:
128        let U128(a, b) = read_small(data);
129        hasher.buffer = folded_multiply(a ^ hasher.buffer, b ^ hasher.extra_keys.1);
130        hasher.pad = hasher.pad.wrapping_add(data.len() as u64);
131    }
132
133    //     println!("buff {} pad {}", hasher.buffer, hasher.pad);
134    hasher.finish()
135}
136
137pub trait StrHash {
138    fn hash(&self, s: &str) -> u64;
139}
140
141impl<H: std::hash::BuildHasher> StrHash for H {
142    #[inline]
143    fn hash(&self, s: &str) -> u64 {
144        let mut state = self.build_hasher();
145        std::hash::Hasher::write(&mut state, s.as_bytes());
146        std::hash::Hasher::finish(&state)
147    }
148}
149
150#[cfg_attr(feature = "rkyv", derive(rkyv::Archive, rkyv::Serialize, rkyv::Deserialize))]
151#[derive(Default)]
152pub struct MojoAHashStrHash;
153
154impl StrHash for MojoAHashStrHash {
155    #[inline]
156    fn hash(&self, s: &str) -> u64 {
157        ahash(s) // call your custom ahash function
158    }
159}
160
161use rustc_hash::FxHasher;
162use ahash::AHasher;
163
164#[cfg_attr(feature = "rkyv", derive(rkyv::Archive, rkyv::Serialize, rkyv::Deserialize))]
165#[derive(Default)]
166pub struct FxStrHash;
167
168impl StrHash for FxStrHash {
169    #[inline]
170    fn hash(&self, s: &str) -> u64 {
171        let mut hasher = FxHasher::default();
172        std::hash::Hasher::write(&mut hasher, s.as_bytes());
173        std::hash::Hasher::finish(&hasher)
174    }
175}
176
177#[cfg_attr(feature = "rkyv", derive(rkyv::Archive, rkyv::Serialize, rkyv::Deserialize))]
178#[derive(Default)]
179pub struct AHashStrHash;
180
181impl StrHash for AHashStrHash {
182    #[inline]
183    fn hash(&self, s: &str) -> u64 {
184        let mut hasher = AHasher::default();
185        std::hash::Hasher::write(&mut hasher, s.as_bytes());
186        std::hash::Hasher::finish(&hasher)
187    }
188}