1use core::{
4 fmt,
5 hash::{self, BuildHasher},
6 mem,
7};
8
9use crate::{IntoU32, IntoU64};
10
11const PRIME32_1: u32 = 0x9E3779B1;
13const PRIME32_2: u32 = 0x85EBCA77;
14const PRIME32_3: u32 = 0xC2B2AE3D;
15const PRIME32_4: u32 = 0x27D4EB2F;
16const PRIME32_5: u32 = 0x165667B1;
17
18type Lane = u32;
19type Lanes = [Lane; 4];
20type Bytes = [u8; 16];
21
22const BYTES_IN_LANE: usize = mem::size_of::<Bytes>();
23
24#[derive(Clone, PartialEq)]
25struct BufferData(Lanes);
26
27impl BufferData {
28 const fn new() -> Self {
29 Self([0; 4])
30 }
31
32 const fn bytes(&self) -> &Bytes {
33 const _: () = assert!(mem::align_of::<u8>() <= mem::align_of::<Lane>());
34 unsafe { &*self.0.as_ptr().cast() }
37 }
38
39 fn bytes_mut(&mut self) -> &mut Bytes {
40 unsafe { &mut *self.0.as_mut_ptr().cast() }
42 }
43}
44
45impl fmt::Debug for BufferData {
46 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
47 f.debug_list().entries(self.0.iter()).finish()
48 }
49}
50
51#[derive(Debug, Clone, PartialEq)]
52struct Buffer {
53 offset: usize,
54 data: BufferData,
55}
56
57impl Buffer {
58 const fn new() -> Self {
59 Self {
60 offset: 0,
61 data: BufferData::new(),
62 }
63 }
64
65 #[inline]
67 fn extend<'d>(&mut self, data: &'d [u8]) -> (Option<&Lanes>, &'d [u8]) {
68 if self.offset == 0 {
78 return (None, data);
79 };
80
81 let bytes = self.data.bytes_mut();
82 debug_assert!(self.offset <= bytes.len());
83
84 let empty = &mut bytes[self.offset..];
85 let n_to_copy = usize::min(empty.len(), data.len());
86
87 let dst = &mut empty[..n_to_copy];
88
89 let (src, rest) = data.split_at(n_to_copy);
90
91 dst.copy_from_slice(src);
92 self.offset += n_to_copy;
93
94 debug_assert!(self.offset <= bytes.len());
95
96 if self.offset == bytes.len() {
97 self.offset = 0;
98 (Some(&self.data.0), rest)
99 } else {
100 (None, rest)
101 }
102 }
103
104 #[inline]
106 fn set(&mut self, data: &[u8]) {
107 if data.is_empty() {
108 return;
109 }
110
111 debug_assert_eq!(self.offset, 0);
112
113 let n_to_copy = data.len();
114
115 let bytes = self.data.bytes_mut();
116 debug_assert!(n_to_copy < bytes.len());
117
118 bytes[..n_to_copy].copy_from_slice(data);
119 self.offset = data.len();
120 }
121
122 #[inline]
124 fn remaining(&self) -> &[u8] {
125 &self.data.bytes()[..self.offset]
126 }
127}
128
129#[derive(Clone, PartialEq)]
130struct Accumulators(Lanes);
131
132impl Accumulators {
133 const fn new(seed: u32) -> Self {
134 Self([
135 seed.wrapping_add(PRIME32_1).wrapping_add(PRIME32_2),
136 seed.wrapping_add(PRIME32_2),
137 seed,
138 seed.wrapping_sub(PRIME32_1),
139 ])
140 }
141
142 #[inline]
144 fn write(&mut self, lanes: Lanes) {
145 let [acc1, acc2, acc3, acc4] = &mut self.0;
146 let [lane1, lane2, lane3, lane4] = lanes;
147
148 *acc1 = round(*acc1, lane1.to_le());
149 *acc2 = round(*acc2, lane2.to_le());
150 *acc3 = round(*acc3, lane3.to_le());
151 *acc4 = round(*acc4, lane4.to_le());
152 }
153
154 #[inline]
156 fn write_many<'d>(&mut self, mut data: &'d [u8]) -> &'d [u8] {
157 while let Some((chunk, rest)) = data.split_first_chunk::<BYTES_IN_LANE>() {
158 let lanes = unsafe { chunk.as_ptr().cast::<Lanes>().read_unaligned() };
161 self.write(lanes);
162 data = rest;
163 }
164 data
165 }
166
167 #[inline]
169 const fn finish(&self) -> u32 {
170 let [acc1, acc2, acc3, acc4] = self.0;
171
172 let acc1 = acc1.rotate_left(1);
173 let acc2 = acc2.rotate_left(7);
174 let acc3 = acc3.rotate_left(12);
175 let acc4 = acc4.rotate_left(18);
176
177 acc1.wrapping_add(acc2)
178 .wrapping_add(acc3)
179 .wrapping_add(acc4)
180 }
181}
182
183impl fmt::Debug for Accumulators {
184 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
185 let [acc1, acc2, acc3, acc4] = self.0;
186 f.debug_struct("Accumulators")
187 .field("acc1", &acc1)
188 .field("acc2", &acc2)
189 .field("acc3", &acc3)
190 .field("acc4", &acc4)
191 .finish()
192 }
193}
194
195#[derive(Debug, Clone, PartialEq)]
203pub struct Hasher {
204 seed: u32,
205 accumulators: Accumulators,
206 buffer: Buffer,
207 length: u64,
208}
209
210impl Default for Hasher {
211 fn default() -> Self {
212 Self::with_seed(0)
213 }
214}
215
216impl Hasher {
217 #[must_use]
220 #[inline]
224 pub fn oneshot(seed: u32, data: &[u8]) -> u32 {
225 let len = data.len();
226
227 let mut accumulators = Accumulators::new(seed);
232
233 let data = accumulators.write_many(data);
234
235 Self::finish_with(seed, len.into_u64(), &accumulators, data)
236 }
237
238 #[must_use]
240 pub const fn with_seed(seed: u32) -> Self {
241 Self {
243 seed,
244 accumulators: Accumulators::new(seed),
245 buffer: Buffer::new(),
246 length: 0,
247 }
248 }
249
250 pub const fn seed(&self) -> u32 {
252 self.seed
253 }
254
255 pub const fn total_len(&self) -> u64 {
257 self.length
258 }
259
260 pub const fn total_len_32(&self) -> u32 {
264 self.length as u32
265 }
266
267 #[must_use]
271 #[inline]
273 pub fn finish_32(&self) -> u32 {
274 Self::finish_with(
275 self.seed,
276 self.length,
277 &self.accumulators,
278 self.buffer.remaining(),
279 )
280 }
281
282 #[must_use]
283 #[inline]
285 fn finish_with(seed: u32, len: u64, accumulators: &Accumulators, mut remaining: &[u8]) -> u32 {
286 let mut acc = if len < BYTES_IN_LANE.into_u64() {
288 seed.wrapping_add(PRIME32_5)
289 } else {
290 accumulators.finish()
291 };
292
293 acc += len as u32;
299
300 while let Some((chunk, rest)) = remaining.split_first_chunk() {
302 let lane = u32::from_ne_bytes(*chunk).to_le();
303
304 acc = acc.wrapping_add(lane.wrapping_mul(PRIME32_3));
305 acc = acc.rotate_left(17).wrapping_mul(PRIME32_4);
306
307 remaining = rest;
308 }
309
310 for &byte in remaining {
311 let lane = byte.into_u32();
312
313 acc = acc.wrapping_add(lane.wrapping_mul(PRIME32_5));
314 acc = acc.rotate_left(11).wrapping_mul(PRIME32_1);
315 }
316
317 acc ^= acc >> 15;
319 acc = acc.wrapping_mul(PRIME32_2);
320 acc ^= acc >> 13;
321 acc = acc.wrapping_mul(PRIME32_3);
322 acc ^= acc >> 16;
323
324 acc
325 }
326}
327
328impl hash::Hasher for Hasher {
329 #[inline]
331 fn write(&mut self, data: &[u8]) {
332 let len = data.len();
333
334 let (buffered_lanes, data) = self.buffer.extend(data);
336
337 if let Some(&lanes) = buffered_lanes {
338 self.accumulators.write(lanes);
339 }
340
341 let data = self.accumulators.write_many(data);
342
343 self.buffer.set(data);
344
345 self.length += len.into_u64();
346 }
347
348 #[inline]
350 fn finish(&self) -> u64 {
351 Hasher::finish_32(self).into()
352 }
353}
354
355#[inline]
357const fn round(mut acc: u32, lane: u32) -> u32 {
358 acc = acc.wrapping_add(lane.wrapping_mul(PRIME32_2));
359 acc = acc.rotate_left(13);
360 acc.wrapping_mul(PRIME32_1)
361}
362
363#[derive(Clone)]
366pub struct State(u32);
367
368impl State {
369 pub fn with_seed(seed: u32) -> Self {
371 Self(seed)
372 }
373}
374
375impl BuildHasher for State {
376 type Hasher = Hasher;
377
378 fn build_hasher(&self) -> Self::Hasher {
379 Hasher::with_seed(self.0)
380 }
381}
382
383#[cfg(test)]
384mod test {
385 use core::{
386 array,
387 hash::{BuildHasherDefault, Hasher as _},
388 };
389 use std::collections::HashMap;
390
391 use super::*;
392
393 const _TRAITS: () = {
394 const fn is_clone<T: Clone>() {}
395 is_clone::<Hasher>();
396 is_clone::<State>();
397 };
398
399 const EMPTY_BYTES: [u8; 0] = [];
400
401 #[test]
402 fn ingesting_byte_by_byte_is_equivalent_to_large_chunks() {
403 let bytes = [0; 32];
404
405 let mut byte_by_byte = Hasher::with_seed(0);
406 for byte in bytes.chunks(1) {
407 byte_by_byte.write(byte);
408 }
409 let byte_by_byte = byte_by_byte.finish();
410
411 let mut one_chunk = Hasher::with_seed(0);
412 one_chunk.write(&bytes);
413 let one_chunk = one_chunk.finish();
414
415 assert_eq!(byte_by_byte, one_chunk);
416 }
417
418 #[test]
419 fn hash_of_nothing_matches_c_implementation() {
420 let mut hasher = Hasher::with_seed(0);
421 hasher.write(&EMPTY_BYTES);
422 assert_eq!(hasher.finish(), 0x02cc_5d05);
423 }
424
425 #[test]
426 fn hash_of_single_byte_matches_c_implementation() {
427 let mut hasher = Hasher::with_seed(0);
428 hasher.write(&[42]);
429 assert_eq!(hasher.finish(), 0xe0fe_705f);
430 }
431
432 #[test]
433 fn hash_of_multiple_bytes_matches_c_implementation() {
434 let mut hasher = Hasher::with_seed(0);
435 hasher.write(b"Hello, world!\0");
436 assert_eq!(hasher.finish(), 0x9e5e_7e93);
437 }
438
439 #[test]
440 fn hash_of_multiple_chunks_matches_c_implementation() {
441 let bytes: [u8; 100] = array::from_fn(|i| i as u8);
442 let mut hasher = Hasher::with_seed(0);
443 hasher.write(&bytes);
444 assert_eq!(hasher.finish(), 0x7f89_ba44);
445 }
446
447 #[test]
448 fn hash_with_different_seed_matches_c_implementation() {
449 let mut hasher = Hasher::with_seed(0x42c9_1977);
450 hasher.write(&EMPTY_BYTES);
451 assert_eq!(hasher.finish(), 0xd6bf_8459);
452 }
453
454 #[test]
455 fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation() {
456 let bytes: [u8; 100] = array::from_fn(|i| i as u8);
457 let mut hasher = Hasher::with_seed(0x42c9_1977);
458 hasher.write(&bytes);
459 assert_eq!(hasher.finish(), 0x6d2f_6c17);
460 }
461
462 #[test]
463 fn hashes_with_different_offsets_are_the_same() {
464 let bytes = [0x7c; 4096];
465 let expected = Hasher::oneshot(0, &[0x7c; 64]);
466
467 let the_same = bytes
468 .windows(64)
469 .map(|w| {
470 let mut hasher = Hasher::with_seed(0);
471 hasher.write(w);
472 hasher.finish_32()
473 })
474 .all(|h| h == expected);
475 assert!(the_same);
476 }
477
478 #[ignore]
483 #[test]
484 fn length_overflows_32bit() {
485 let bytes200: [u8; 200] = array::from_fn(|i| i as _);
487
488 let mut hasher = Hasher::with_seed(0);
489 for _ in 0..(4_300_000_000 / bytes200.len()) {
490 hasher.write(&bytes200);
491 }
492
493 assert_eq!(hasher.total_len(), 0x0000_0001_004c_cb00);
494 assert_eq!(hasher.total_len_32(), 0x004c_cb00);
495
496 assert_eq!(hasher.finish(), 0x1522_4ca7);
498 }
499
500 #[test]
501 fn can_be_used_in_a_hashmap_with_a_default_seed() {
502 let mut hash: HashMap<_, _, BuildHasherDefault<Hasher>> = Default::default();
503 hash.insert(42, "the answer");
504 assert_eq!(hash.get(&42), Some(&"the answer"));
505 }
506}
507
508#[cfg(feature = "random")]
509#[cfg_attr(docsrs, doc(cfg(feature = "random")))]
510mod random_impl {
511 use super::*;
512
513 #[derive(Clone)]
516 pub struct RandomState(State);
517
518 impl Default for RandomState {
519 fn default() -> Self {
520 Self::new()
521 }
522 }
523
524 impl RandomState {
525 fn new() -> Self {
526 Self(State::with_seed(rand::random()))
527 }
528 }
529
530 impl BuildHasher for RandomState {
531 type Hasher = Hasher;
532
533 fn build_hasher(&self) -> Self::Hasher {
534 self.0.build_hasher()
535 }
536 }
537
538 #[cfg(test)]
539 mod test {
540 use std::collections::HashMap;
541
542 use super::*;
543
544 const _: () = {
545 const fn is_clone<T: Clone>() {}
546 is_clone::<Hasher>();
547 is_clone::<RandomState>();
548 };
549
550 #[test]
551 fn can_be_used_in_a_hashmap_with_a_random_seed() {
552 let mut hash: HashMap<_, _, RandomState> = Default::default();
553 hash.insert(42, "the answer");
554 assert_eq!(hash.get(&42), Some(&"the answer"));
555 }
556 }
557}
558
559#[cfg(feature = "random")]
560#[cfg_attr(docsrs, doc(cfg(feature = "random")))]
561pub use random_impl::*;
562
563#[cfg(feature = "serialize")]
564#[cfg_attr(docsrs, doc(cfg(feature = "serialize")))]
565mod serialize_impl {
566 use serde::{Deserialize, Serialize};
567
568 use super::*;
569
570 impl<'de> Deserialize<'de> for Hasher {
571 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
572 where
573 D: serde::Deserializer<'de>,
574 {
575 let shim = Deserialize::deserialize(deserializer)?;
576
577 let Shim {
578 total_len,
579 seed,
580 core,
581 buffer,
582 buffer_usage,
583 } = shim;
584 let Core { v1, v2, v3, v4 } = core;
585
586 let mut buffer_data = BufferData::new();
587 buffer_data.bytes_mut().copy_from_slice(&buffer);
588
589 Ok(Hasher {
590 seed,
591 accumulators: Accumulators([v1, v2, v3, v4]),
592 buffer: Buffer {
593 offset: buffer_usage,
594 data: buffer_data,
595 },
596 length: total_len,
597 })
598 }
599 }
600
601 impl Serialize for Hasher {
602 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
603 where
604 S: serde::Serializer,
605 {
606 let Hasher {
607 seed,
608 ref accumulators,
609 ref buffer,
610 length,
611 } = *self;
612 let [v1, v2, v3, v4] = accumulators.0;
613 let Buffer { offset, ref data } = *buffer;
614 let buffer = *data.bytes();
615
616 let shim = Shim {
617 total_len: length,
618 seed,
619 core: Core { v1, v2, v3, v4 },
620 buffer,
621 buffer_usage: offset,
622 };
623
624 shim.serialize(serializer)
625 }
626 }
627
628 #[derive(Serialize, Deserialize)]
629 struct Shim {
630 total_len: u64,
631 seed: u32,
632 core: Core,
633 buffer: [u8; 16],
634 buffer_usage: usize,
635 }
636
637 #[derive(Serialize, Deserialize)]
638 struct Core {
639 v1: u32,
640 v2: u32,
641 v3: u32,
642 v4: u32,
643 }
644
645 #[cfg(test)]
646 mod test {
647 use std::hash::Hasher as _;
648
649 use super::*;
650
651 type Result<T = (), E = serde_json::Error> = core::result::Result<T, E>;
652
653 #[test]
654 fn test_serialization_cycle() -> Result {
655 let mut hasher = Hasher::with_seed(0);
656 hasher.write(b"Hello, world!\0");
657 let _ = hasher.finish();
658
659 let serialized = serde_json::to_string(&hasher)?;
660 let unserialized: Hasher = serde_json::from_str(&serialized)?;
661 assert_eq!(hasher, unserialized);
662 Ok(())
663 }
664
665 #[test]
666 fn test_serialization_stability() -> Result {
667 let mut hasher = Hasher::with_seed(0);
668 hasher.write(b"Hello, world!\0");
669 let _ = hasher.finish();
670
671 let expected_serialized = r#"{
672 "total_len": 14,
673 "seed": 0,
674 "core": {
675 "v1": 606290984,
676 "v2": 2246822519,
677 "v3": 0,
678 "v4": 1640531535
679 },
680 "buffer": [
681 72, 101, 108, 108, 111, 44, 32, 119,
682 111, 114, 108, 100, 33, 0, 0, 0
683 ],
684 "buffer_usage": 14
685 }"#;
686
687 let unserialized: Hasher = serde_json::from_str(expected_serialized)?;
688 assert_eq!(hasher, unserialized);
689
690 let expected_value: serde_json::Value = serde_json::from_str(expected_serialized)?;
691 let actual_value = serde_json::to_value(&hasher)?;
692 assert_eq!(expected_value, actual_value);
693
694 Ok(())
695 }
696 }
697}