reductionml_core/hash.rs
1#[inline(always)]
2pub fn hash_bytes(key: &[u8], seed: u32) -> u32 {
3 // murmurhash3_32(key, seed)
4 twox_hash::xxh3::hash64_with_seed(key, seed as u64) as u32
5}
6
7// fn fmix(mut h: u32) -> u32 {
8// h ^= h >> 16;
9// h = h.wrapping_mul(0x85eb_ca6b);
10// h ^= h >> 13;
11// h = h.wrapping_mul(0xc2b2_ae35);
12// h ^= h >> 16;
13// h
14// }
15
16// #[allow(arithmetic_overflow)]
17// fn murmurhash3_32(key: &[u8], seed: u32) -> u32 {
18// let num_blocks = key.len() / 4;
19// let mut h1 = seed;
20
21// const CONSTANT1: u32 = 0xcc9e_2d51;
22// const CONSTANT2: u32 = 0x1b87_3593;
23
24// // Body
25// for i in 0..num_blocks {
26// let mut b = Cursor::new(&key[(i * 4)..(i * 4) + 4]);
27// let mut current_block = b.read_u32::<LittleEndian>().unwrap();
28
29// current_block = current_block.wrapping_mul(CONSTANT1);
30// current_block = current_block.rotate_left(15);
31// current_block = current_block.wrapping_mul(CONSTANT2);
32
33// h1 ^= current_block;
34// h1 = h1.rotate_left(13);
35// h1 = h1.wrapping_mul(5).wrapping_add(0xe654_6b64);
36// }
37
38// // Tail
39// let mut k1: u32 = 0;
40// let tail = &key[num_blocks * 4..];
41
42// if !tail.is_empty() {
43// if tail.len() >= 3 {
44// k1 ^= u32::from(tail[2]).wrapping_shl(16);
45// }
46
47// if tail.len() >= 2 {
48// k1 ^= u32::from(tail[1]).wrapping_shl(8);
49// }
50
51// k1 ^= u32::from(tail[0]);
52// k1 = k1.wrapping_mul(CONSTANT1);
53// k1 = k1.rotate_left(15);
54// k1 = k1.wrapping_mul(CONSTANT2);
55// h1 ^= k1;
56// }
57
58// // Finalization
59// h1 ^= key.len() as u32;
60// fmix(h1)
61// }
62
63pub(crate) const FNV_PRIME: u32 = 16777619;
64
65// // Test truth values calculated using C++ implementation.
66// #[test]
67// fn fmix_tests() {
68// assert_eq!(fmix(0), 0);
69// assert_eq!(fmix(1), 1364076727);
70// assert_eq!(fmix(5), 3423425485);
71// assert_eq!(fmix(2147483647), 4190899880);
72// assert_eq!(fmix(4294967295), 2180083513);
73// }
74
75// #[test]
76// fn hash_tests_zero_seed() {
77// assert_eq!(murmurhash3_32(b"t", 0), 3397902157);
78// assert_eq!(murmurhash3_32(b"te", 0), 3988319771);
79// assert_eq!(murmurhash3_32(b"tes", 0), 196677210);
80// assert_eq!(murmurhash3_32(b"test", 0), 3127628307);
81// assert_eq!(murmurhash3_32(b"tested", 0), 2247989476);
82// assert_eq!(
83// murmurhash3_32(b"8hv20cjwicnsj vw m000'.'.][][]...!!@3", 0),
84// 4212741639
85// );
86// }
87
88// #[test]
89// fn hash_tests_nonzero_seed() {
90// assert_eq!(murmurhash3_32(b"t", 25436347), 960607349);
91// assert_eq!(murmurhash3_32(b"te", 25436347), 2834341637);
92// assert_eq!(murmurhash3_32(b"tes", 25436347), 1163171263);
93// assert_eq!(murmurhash3_32(b"tested", 25436347), 3592599130);
94// assert_eq!(
95// murmurhash3_32(b"8hv20cjwicnsj vw m000'.'.][][]...!!@3", 25436347),
96// 2503360452
97// );
98// }
99
100// #[test]
101// fn hash_feature_tests() {
102// // Hashes calculated using VW CLI
103// assert_eq!(
104// hash_feature(
105// &features::Feature::Simple {
106// namespace: "myNamespace".to_string(),
107// name: "feature".to_string()
108// },
109// 0
110// ),
111// 1717770527
112// );
113// assert_eq!(
114// hash_feature(
115// &features::Feature::Simple {
116// namespace: "a".to_string(),
117// name: "a1".to_string()
118// },
119// 0
120// ),
121// 2579875658
122// );
123// assert_eq!(
124// hash_feature(
125// &features::Feature::SimpleWithStringValue {
126// namespace: "myNamespace".to_string(),
127// name: "feature".to_string(),
128// value: "value".to_string()
129// },
130// 0
131// ),
132// 3812705603
133// );
134// assert_eq!(
135// hash_feature(
136// &features::Feature::Anonymous {
137// namespace: "anon".to_string(),
138// offset: 0
139// },
140// 0
141// ),
142// 659962185
143// );
144// assert_eq!(
145// hash_feature(
146// &features::Feature::Anonymous {
147// namespace: "anon".to_string(),
148// offset: 1
149// },
150// 0
151// ),
152// 659962186
153// );
154// }
155
156// #[test]
157// fn hash_feature_with_bit_mask_tests() {
158// assert_eq!(
159// mask_hash(
160// hash_feature(
161// &features::Feature::Simple {
162// namespace: "myNamespace".to_string(),
163// name: "feature".to_string()
164// },
165// 0
166// ),
167// bit_mask(18)
168// ),
169// 203039
170// );
171// assert_eq!(
172// mask_hash(
173// hash_feature(
174// &features::Feature::Simple {
175// namespace: "myNamespace".to_string(),
176// name: "feature".to_string()
177// },
178// 0
179// ),
180// bit_mask(5)
181// ),
182// 31
183// );
184
185// assert_eq!(
186// mask_hash(
187// hash_feature(
188// &features::Feature::SimpleWithStringValue {
189// namespace: "myNamespace".to_string(),
190// name: "feature".to_string(),
191// value: "value".to_string()
192// },
193// 0
194// ),
195// bit_mask(18)
196// ),
197// 83267
198// );
199// assert_eq!(
200// mask_hash(
201// hash_feature(
202// &features::Feature::SimpleWithStringValue {
203// namespace: "myNamespace".to_string(),
204// name: "feature".to_string(),
205// value: "value".to_string()
206// },
207// 0
208// ),
209// bit_mask(5)
210// ),
211// 3
212// );
213
214// assert_eq!(
215// mask_hash(
216// hash_feature(
217// &features::Feature::Anonymous {
218// namespace: "anon".to_string(),
219// offset: 0
220// },
221// 0
222// ),
223// bit_mask(18)
224// ),
225// 145737
226// );
227// assert_eq!(
228// mask_hash(
229// hash_feature(
230// &features::Feature::Anonymous {
231// namespace: "anon".to_string(),
232// offset: 1
233// },
234// 0
235// ),
236// bit_mask(18)
237// ),
238// 145738
239// );
240
241// assert_eq!(
242// mask_hash(
243// hash_feature(
244// &features::Feature::Anonymous {
245// namespace: "anon".to_string(),
246// offset: 0
247// },
248// 0
249// ),
250// bit_mask(5)
251// ),
252// 9
253// );
254// assert_eq!(
255// mask_hash(
256// hash_feature(
257// &features::Feature::Anonymous {
258// namespace: "anon".to_string(),
259// offset: 1
260// },
261// 0
262// ),
263// bit_mask(5)
264// ),
265// 10
266// );
267// }
268
269// #[test]
270// fn hash_interactions() {
271// assert_eq!(
272// hash_feature(
273// &features::Feature::Interacted {
274// terms: vec![
275// features::Feature::Simple {
276// namespace: "a".to_string(),
277// name: "a1".to_string()
278// },
279// features::Feature::Simple {
280// namespace: "b".to_string(),
281// name: "b1".to_string()
282// }
283// ]
284// },
285// 0
286// ),
287// 1046402606
288// );
289// }