musli_zerocopy/phf/
generator.rs

1use core::hash::Hash;
2
3use alloc::vec;
4use alloc::vec::Vec;
5
6use crate::buf::{Buf, Visit};
7use crate::error::{Error, ErrorKind};
8use crate::phf::Entry;
9use crate::phf::hashing::{HashKey, Hashes, displace, hash};
10use crate::{ByteOrder, Ref, Size, ZeroCopy};
11
12use rand::distr::StandardUniform;
13use rand::rngs::SmallRng;
14use rand::{Rng, SeedableRng};
15
16const DEFAULT_LAMBDA: usize = 5;
17const FIXED_SEED: u64 = 1234567890;
18
19pub(crate) struct HashState {
20    pub(crate) key: HashKey,
21}
22
23/// Calculate displacements length.
24#[inline]
25pub(crate) fn displacements_len(len: usize) -> usize {
26    len.div_ceil(DEFAULT_LAMBDA)
27}
28
29pub(crate) fn generate_hash<K, T, F, E, O>(
30    buf: &mut Buf,
31    entries: &Ref<[T], E, O>,
32    displacements: &Ref<[Entry<u32, u32>], E, O>,
33    map: &Ref<[usize], E, O>,
34    access: F,
35) -> Result<HashState, Error>
36where
37    K: Visit,
38    K::Target: Hash,
39    F: Fn(&T) -> &K,
40    T: ZeroCopy,
41    E: ByteOrder,
42    O: Size,
43{
44    for key in SmallRng::seed_from_u64(FIXED_SEED).sample_iter(StandardUniform) {
45        if let Some(hash) = try_generate_hash(buf, entries, displacements, map, key, &access)? {
46            return Ok(hash);
47        }
48
49        // Reset the state of displacements and maps since we're trying again.
50        for d in buf.load_mut(*displacements)? {
51            *d = Entry::new(0, 0);
52        }
53
54        for m in buf.load_mut(*map)? {
55            *m = usize::MAX;
56        }
57    }
58
59    Err(Error::new(ErrorKind::FailedPhf))
60}
61
62fn try_generate_hash<K, T, F, E, O>(
63    buf: &mut Buf,
64    entries: &Ref<[T], E, O>,
65    displacements: &Ref<[Entry<u32, u32>], E, O>,
66    map: &Ref<[usize], E, O>,
67    key: HashKey,
68    access: &F,
69) -> Result<Option<HashState>, Error>
70where
71    K: Visit,
72    K::Target: Hash,
73    F: ?Sized + Fn(&T) -> &K,
74    T: ZeroCopy,
75    E: ByteOrder,
76    O: Size,
77{
78    let mut hashes = Vec::new();
79
80    for entry in entries.iter() {
81        let entry = buf.load(entry)?;
82        let entry_key = access(entry);
83        let h = hash(buf, entry_key, &key)?;
84        hashes.push(h);
85    }
86
87    let mut buckets = vec![Vec::<usize>::new(); displacements.len()];
88
89    for (index, hash) in hashes.iter().enumerate() {
90        let to = hash.g % buckets.len();
91        buckets[to].push(index);
92    }
93
94    buckets.sort_by_key(|a| a.len());
95
96    let table_len = hashes.len();
97    // let mut map = vec![usize::MAX; table_len];
98
99    // store whether an element from the bucket being placed is
100    // located at a certain position, to allow for efficient overlap
101    // checks. It works by storing the generation in each cell and
102    // each new placement-attempt is a new generation, so you can tell
103    // if this is legitimately full by checking that the generations
104    // are equal. (A u64 is far too large to overflow in a reasonable
105    // time for current hardware.)
106    let mut try_map = vec![0u64; table_len];
107    let mut generation = 0u64;
108
109    // the actual values corresponding to the markers above, as
110    // (index, key) pairs, for adding to the main map once we've
111    // chosen the right displacements.
112    let mut values_to_add = vec![];
113
114    'outer: for (bucket, d_ref) in buckets.iter().zip(displacements.iter()) {
115        for d1 in 0..(table_len as u32) {
116            'inner: for d2 in 0..(table_len as u32) {
117                values_to_add.clear();
118                generation += 1;
119
120                for &key in bucket {
121                    let Hashes { f1, f2, .. } = hashes[key];
122                    let index = displace(f1, f2, d1, d2) as usize;
123                    let index = index % table_len;
124
125                    if *buf.load(map.at(index))? != usize::MAX || try_map[index] == generation {
126                        continue 'inner;
127                    }
128
129                    try_map[index] = generation;
130                    values_to_add.push((index, key));
131                }
132
133                // We've picked a good set of displacements
134                *buf.load_mut(d_ref)? = Entry::new(d1, d2);
135
136                for &(i, key) in &values_to_add {
137                    let m = buf.load_mut(map.at(i))?;
138                    *m = key;
139                }
140
141                continue 'outer;
142            }
143        }
144
145        // Unable to find displacements for a bucket
146        return Ok(None);
147    }
148
149    Ok(Some(HashState { key }))
150}