rkyv_test/collections/hash_index/
mod.rs

1//! A helper type that archives index data for hashed collections using
2//! [compress, hash and displace](http://cmph.sourceforge.net/papers/esa09.pdf).
3
4use crate::{Archive, Archived, RelPtr};
5use core::{
6    fmt,
7    hash::{Hash, Hasher},
8    slice,
9};
10
11/// The hash builder for archived hash indexes.
12pub use seahash::SeaHasher as HashBuilder;
13
14#[cfg(feature = "validation")]
15pub mod validation;
16
17/// An archived hash index.
18#[cfg_attr(feature = "strict", repr(C))]
19pub struct ArchivedHashIndex {
20    len: Archived<usize>,
21    displace: RelPtr<Archived<u32>>,
22}
23
24impl ArchivedHashIndex {
25    /// Gets the number of items in the hash index.
26    #[inline]
27    pub const fn len(&self) -> usize {
28        from_archived!(self.len) as usize
29    }
30
31    #[inline]
32    fn make_hasher() -> HashBuilder {
33        HashBuilder::with_seeds(
34            0x08576fb6170b5f5f,
35            0x587775eeb84a7e46,
36            0xac701115428ee569,
37            0x910feb91b92bb1cd,
38        )
39    }
40
41    /// Gets the hasher for this hash index. The hasher for all archived hash indexes is the same
42    /// for reproducibility.
43    #[inline]
44    pub fn hasher(&self) -> HashBuilder {
45        Self::make_hasher()
46    }
47
48    #[inline]
49    fn displace_slice(&self) -> &[Archived<u32>] {
50        unsafe { slice::from_raw_parts(self.displace.as_ptr(), self.len()) }
51    }
52
53    #[inline]
54    fn displace(&self, index: usize) -> u32 {
55        from_archived!(self.displace_slice()[index])
56    }
57
58    /// Returns the index where a key may be located in the hash index.
59    ///
60    /// The hash index does not have access to the keys used to build it, so the key at the returned
61    /// index must be checked for equality.
62    #[inline]
63    pub fn index<K: Hash + ?Sized>(&self, k: &K) -> Option<usize> {
64        let mut hasher = self.hasher();
65        k.hash(&mut hasher);
66        let displace_index = hasher.finish() % self.len() as u64;
67        let displace = self.displace(displace_index as usize);
68
69        if displace == u32::MAX {
70            None
71        } else if displace & 0x80_00_00_00 == 0 {
72            Some(displace as usize)
73        } else {
74            let mut hasher = self.hasher();
75            displace.hash(&mut hasher);
76            k.hash(&mut hasher);
77            let index = hasher.finish() % self.len() as u64;
78            Some(index as usize)
79        }
80    }
81
82    /// Returns whether there are no items in the hash index.
83    #[inline]
84    pub const fn is_empty(&self) -> bool {
85        self.len() == 0
86    }
87
88    /// Resolves an archived hash index from a given length and parameters.
89    ///
90    /// # Safety
91    ///
92    /// - `len` must be the number of elements in the hash index
93    /// - `pos` must be the position of `out` within the archive
94    /// - `resolver` must be the result of building and serializing a hash index
95    #[inline]
96    pub unsafe fn resolve_from_len(
97        len: usize,
98        pos: usize,
99        resolver: HashIndexResolver,
100        out: *mut Self,
101    ) {
102        let (fp, fo) = out_field!(out.len);
103        len.resolve(pos + fp, (), fo);
104
105        let (fp, fo) = out_field!(out.displace);
106        RelPtr::emplace(pos + fp, resolver.displace_pos, fo);
107    }
108}
109
110#[cfg(feature = "alloc")]
111const _: () = {
112    use crate::{
113        ser::{ScratchSpace, Serializer},
114        ScratchVec,
115    };
116    #[cfg(not(feature = "std"))]
117    use alloc::vec::Vec;
118    use core::{
119        cmp::Reverse,
120        mem::{size_of, MaybeUninit},
121    };
122
123    impl ArchivedHashIndex {
124        /// Builds and serializes a hash index from an iterator of key-value pairs.
125        ///
126        /// # Safety
127        ///
128        /// - The keys returned by the iterator must be unique.
129        /// - `entries` must have a capacity of `iter.len()` entries.
130        #[allow(clippy::type_complexity)]
131        pub unsafe fn build_and_serialize<'a, K, V, S, I>(
132            iter: I,
133            serializer: &mut S,
134            entries: &mut ScratchVec<MaybeUninit<(&'a K, &'a V)>>,
135        ) -> Result<HashIndexResolver, S::Error>
136        where
137            K: 'a + Hash,
138            V: 'a,
139            S: Serializer + ScratchSpace + ?Sized,
140            I: ExactSizeIterator<Item = (&'a K, &'a V)>,
141        {
142            let len = iter.len();
143
144            let mut bucket_size = ScratchVec::new(serializer, len)?;
145            for _ in 0..len {
146                bucket_size.push(0u32);
147            }
148
149            let mut displaces = ScratchVec::new(serializer, len)?;
150
151            for (key, value) in iter {
152                let mut hasher = Self::make_hasher();
153                key.hash(&mut hasher);
154                let displace = (hasher.finish() % len as u64) as u32;
155                displaces.push((displace, (key, value)));
156                bucket_size[displace as usize] += 1;
157            }
158
159            displaces
160                .sort_by_key(|&(displace, _)| (Reverse(bucket_size[displace as usize]), displace));
161
162            let mut occupied = ScratchVec::new(serializer, len)?;
163            for _ in 0..len {
164                occupied.push(false);
165            }
166
167            let mut displacements = ScratchVec::new(serializer, len)?;
168            for _ in 0..len {
169                displacements.push(to_archived!(u32::MAX));
170            }
171
172            let mut first_empty = 0;
173            let mut assignments = Vec::with_capacity(8);
174
175            let mut start = 0;
176            while start < displaces.len() {
177                let displace = displaces[start].0;
178                let bucket_size = bucket_size[displace as usize] as usize;
179                let end = start + bucket_size;
180                let bucket = &displaces[start..end];
181                start = end;
182
183                if bucket_size > 1 {
184                    'find_seed: for seed in 0x80_00_00_00u32..=0xFF_FF_FF_FFu32 {
185                        let mut base_hasher = Self::make_hasher();
186                        seed.hash(&mut base_hasher);
187
188                        assignments.clear();
189
190                        for &(_, (key, _)) in bucket.iter() {
191                            let mut hasher = base_hasher;
192                            key.hash(&mut hasher);
193                            let index = (hasher.finish() % len as u64) as u32;
194                            if occupied[index as usize] || assignments.contains(&index) {
195                                continue 'find_seed;
196                            } else {
197                                assignments.push(index);
198                            }
199                        }
200
201                        for i in 0..bucket_size {
202                            occupied[assignments[i] as usize] = true;
203                            entries[assignments[i] as usize]
204                                .as_mut_ptr()
205                                .write(bucket[i].1);
206                        }
207                        displacements[displace as usize] = to_archived!(seed);
208                        break;
209                    }
210                } else {
211                    let offset = occupied[first_empty..]
212                        .iter()
213                        .position(|value| !value)
214                        .unwrap();
215                    first_empty += offset;
216                    occupied[first_empty] = true;
217                    entries[first_empty].as_mut_ptr().write(bucket[0].1);
218                    displacements[displace as usize] = to_archived!(first_empty as u32);
219                    first_empty += 1;
220                }
221            }
222
223            // Write displacements
224            let displace_pos = serializer.align_for::<Archived<u32>>()?;
225            let displacements_slice = slice::from_raw_parts(
226                displacements.as_ptr().cast::<u8>(),
227                len * size_of::<Archived<u32>>(),
228            );
229            serializer.write(displacements_slice)?;
230
231            // Free scratch vecs
232            displacements.free(serializer)?;
233            occupied.free(serializer)?;
234            displaces.free(serializer)?;
235            bucket_size.free(serializer)?;
236
237            Ok(HashIndexResolver { displace_pos })
238        }
239    }
240};
241
242impl fmt::Debug for ArchivedHashIndex {
243    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
244        f.debug_list().entries(self.displace_slice()).finish()
245    }
246}
247
248/// The resolver for an archived hash index.
249pub struct HashIndexResolver {
250    displace_pos: usize,
251}