1#![feature(test)]
2extern crate test;
3
4use core::fmt::*;
15use core::hash::Hash;
16use core::ops::{Index, IndexMut};
17use fixedbitset::FixedBitSet;
18use fxhash::FxHasher64;
19use pi_null::Null;
20use pi_wy_rng::WyRng;
21use rand::{RngCore, SeedableRng};
22use std::{hash::Hasher, marker::PhantomData, mem};
23
24pub struct PhfMap<K: Hash, V: Null> {
25 lut: Vec<V>,
27 len: usize,
29 hasher: u64,
31 right_shift: u32,
33 offset: u32,
35 _k: PhantomData<K>,
36}
37
38impl<K: Hash, V: Null> PhfMap<K, V> {
40 pub fn new(vec: Vec<(K, V)>) -> Self {
42 Self::with_hasher(vec, 0)
43 }
44 pub fn with_hasher(vec: Vec<(K, V)>, mut hasher: u64) -> Self {
46 let len = vec.len();
49 if len == 0 {
50 return PhfMap {
51 lut: vec![],
52 len,
53 hasher,
54 right_shift: 0,
55 offset: 0,
56 _k: PhantomData,
57 }
58 }
59 let mut right_shift = (len as u64 * 3).leading_zeros();
60 let size = 1 << (u64::BITS - right_shift);
61 let mut conflicts = FixedBitSet::with_capacity(size);
63 let mut count = 0;
64 let mut r = WyRng::seed_from_u64(hasher);
65 loop {
66 if Self::test(&vec, hasher, right_shift, &mut conflicts) {
67 return Self::make(vec, hasher, right_shift, conflicts);
69 }
70 count += 1;
71 if count >= 1024 {
72 right_shift -= 1;
74 let size = 1 << (u64::BITS - right_shift);
75 conflicts = FixedBitSet::with_capacity(size);
76 hasher = r.next_u64();
77 count = 0;
78 continue;
79 }
80 conflicts.clear();
81 hasher = r.next_u64();
82 }
83 }
84 fn test(vec: &Vec<(K, V)>, hasher: u64, right_shift: u32, conflicts: &mut FixedBitSet) -> bool {
85 for (k, _v) in vec.iter() {
86 let i = Self::hash(k, hasher, right_shift);
87 if conflicts.contains(i) {
88 return false;
89 }
90 conflicts.set(i, true);
91 }
92 true
93 }
94 fn make(vec: Vec<(K, V)>, hasher: u64, right_shift: u32, conflicts: FixedBitSet) -> Self {
95 let mut offset = 0;
96 for i in 0..conflicts.len() {
97 if conflicts.contains(i) {
98 break;
99 }
100 offset += 1;
101 }
102 let mut end = conflicts.len();
103 for i in (0..conflicts.len()).rev() {
104 if conflicts.contains(i) {
105 break;
106 }
107 end -= 1;
108 }
109 let len = end - offset as usize;
110 let mut lut = Vec::with_capacity(len);
111 lut.extend((0..len).map(|_| V::null()));
112 for (k, v) in vec.into_iter() {
113 let i = Self::hash(&k, hasher, right_shift);
114 lut[i - offset as usize] = v;
115 }
116 PhfMap {
117 lut,
118 len,
119 hasher,
120 right_shift,
121 offset,
122 _k: PhantomData,
123 }
124 }
125 pub fn len(&self) -> usize {
127 self.len
128 }
129 pub fn capacity(&self) -> usize {
131 self.lut.len()
132 }
133 #[inline(always)]
135 pub fn get(&self, k: &K) -> Option<&V> {
136 let h = self.location(k);
137 self.lut.get(h)
138 }
139 #[inline(always)]
141 pub fn get_mut(&mut self, k: &K) -> Option<&mut V> {
142 let h = self.location(k);
143 self.lut.get_mut(h)
144 }
145 #[inline(always)]
147 pub unsafe fn get_unchecked(&self, k: &K) -> &V {
148 let h = self.location(k);
149 self.lut.get_unchecked(h)
150 }
151 #[inline(always)]
153 pub unsafe fn get_unchecked_mut(&mut self, k: &K) -> &mut V {
154 let h = self.location(k);
155 self.lut.get_unchecked_mut(h)
156 }
157 #[inline(always)]
158 pub fn val_iter(&self) -> Iter<'_, V> {
159 Iter {
160 lut: &self.lut,
161 idx: 0,
162 len: self.len,
163 }
164 }
165 #[inline(always)]
166 pub fn val_iter_mut(&mut self) -> IterMut<'_, V> {
167 IterMut {
168 lut: &mut self.lut,
169 idx: 0,
170 len: self.len,
171 }
172 }
173 #[inline(always)]
174 pub fn into_vec(self) -> Vec<V> {
175 self.lut
176 }
177 #[inline(always)]
179 fn location(&self, k: &K) -> usize {
180 Self::hash(k, self.hasher, self.right_shift).wrapping_sub(self.offset as usize)
181 }
182 #[inline(always)]
184 fn hash(k: &K, hasher: u64, right_shift: u32) -> usize {
185 let mut state: FxHasher64 = unsafe { mem::transmute(hasher) };
186 k.hash(&mut state);
187 (state.finish() >> right_shift) as usize
188 }
189}
190impl<K: Hash, V: Null> Index<K> for PhfMap<K, V> {
191 type Output = V;
192 fn index(&self, key: K) -> &V {
193 self.get(&key).unwrap()
194 }
195}
196impl<K: Hash, V: Null> IndexMut<K> for PhfMap<K, V> {
197 fn index_mut(&mut self, key: K) -> &mut V {
198 self.get_mut(&key).unwrap()
199 }
200}
201
202impl<K: Hash, V: Null + Clone> Clone for PhfMap<K, V> {
203 fn clone(&self) -> Self {
204 PhfMap {
205 lut: self.lut.clone(),
206 len: self.len,
207 hasher: self.hasher,
208 right_shift: self.right_shift,
209 offset: self.offset,
210 _k: PhantomData,
211 }
212 }
213}
214
215impl<K: Hash, V: Null + Debug> Debug for PhfMap<K, V> {
216 fn fmt(&self, f: &mut Formatter<'_>) -> Result {
217 let vec: Vec<&V> = self.val_iter().collect();
218 f.debug_struct("PhfMap")
219 .field("lut", &vec.as_slice())
220 .field("len", &self.len)
221 .field("hasher", &self.hasher)
222 .field("right_shift", &self.right_shift)
223 .field("offset", &self.offset)
224 .finish()
225 }
226}
227
228pub struct Iter<'a, T: Null> {
229 lut: &'a Vec<T>,
230 idx: usize,
231 len: usize,
232}
233
234impl<'a, T: Null> Iterator for Iter<'a, T> {
235 type Item = &'a T;
236 fn next(&mut self) -> Option<Self::Item> {
237 while self.idx < self.lut.len() {
238 let v = unsafe { self.lut.get_unchecked(self.idx) };
239 self.idx += 1;
240 if !v.is_null() {
241 return Some(v);
242 }
243 }
244 None
245 }
246 fn size_hint(&self) -> (usize, Option<usize>) {
247 (self.len, Some(self.len))
248 }
249}
250impl<T: Null + Debug> Debug for Iter<'_, T> {
251 fn fmt(&self, f: &mut Formatter<'_>) -> Result {
252 f.debug_tuple("Iter").field(&self.idx).finish()
253 }
254}
255
256pub struct IterMut<'a, V: Null> {
257 lut: &'a mut Vec<V>,
258 idx: usize,
259 len: usize,
260}
261
262impl<'a, V: Null> Iterator for IterMut<'a, V> {
263 type Item = &'a mut V;
264 fn next(&mut self) -> Option<Self::Item> {
265 let len = self.lut.len();
266 while self.idx < len {
267 let v = unsafe { self.lut.get_unchecked_mut(self.idx) as *mut V };
268 self.idx += 1;
269 if !v.is_null() {
270 return Some(unsafe { &mut *v });
271 }
272 }
273 None
274 }
275 fn size_hint(&self) -> (usize, Option<usize>) {
276 (self.len, Some(self.len))
277 }
278}
279#[cfg(test)]
280mod test_mod {
281 extern crate rand;
282
283 use crate::*;
284 use rand::Rng;
285 use test::Bencher;
286
287 #[test]
288 fn test() {
289 let mut rng = pi_wy_rng::WyRng::seed_from_u64(22222);
294 let mut arr = Vec::new();
295 for _ in 0..16 {
296 let k = rng.gen::<usize>();
297 arr.push((k, k + 1));
298 }
299 let map = PhfMap::with_hasher(arr.clone(), 0);
300 println!("map:{:?}", map);
301 for (k, v) in arr {
302 let n = map[k];
303 assert_eq!(n, v);
304 }
305 }
306
307 #[bench]
308 fn bench_make(b: &mut Bencher) {
309 use rand::Rng;
310
311 let mut rng = rand::thread_rng();
312 let mut arr = Vec::new();
313 for _ in 0..430 {
314 let k = rng.gen::<usize>();
315 arr.push((k, k + 1));
316 }
317 b.iter(move || {
318 let map = PhfMap::with_hasher(arr.clone(), 0);
319 for (k, v) in arr.iter() {
320 let n = map.get(&k).unwrap();
321 assert_eq!(n, v);
322 }
323 });
324 }
325 #[bench]
326 fn bench_test(b: &mut Bencher) {
327 use rand::Rng;
328 let mut rng = rand::thread_rng();
329 let mut arr = Vec::new();
330 for _ in 0..430 {
331 let k = rng.gen::<usize>();
332 arr.push((k, k + 1));
333 }
334 let map = PhfMap::with_hasher(arr.clone(), 0);
335 println!("map capacity:{}", map.capacity());
336 b.iter(move || {
337 for &(k, v) in &arr {
338 let n = map.get(&k).unwrap();
339 assert_eq!(*n, v);
340 }
341 });
342 }
343 #[bench]
344 fn bench_test_arr(b: &mut Bencher) {
345 use rand::Rng;
346 let mut rng = rand::thread_rng();
347 let mut arr = Vec::new();
348 for _ in 0..430 {
349 let k = rng.gen::<usize>();
350 arr.push((k, k + 1));
351 }
352 let mut suffle = arr.clone();
353 suffle.sort();
354 println!("arr len:{}", 1);
355 b.iter(move || {
356 for &(k, _v) in &arr {
357 let n = suffle.iter().position(|&x| x.0 == k);
358 assert!(n.is_some());
359 }
360 });
361 }
362 #[bench]
363 fn bench_test_map(b: &mut Bencher) {
364 use rand::Rng;
365 use std::collections::HashMap;
366 use std::hash::BuildHasherDefault;
367
368 pub type XHashMap<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher64>>;
369 let mut rng = rand::thread_rng();
370 let mut arr = Vec::new();
371 let mut map: XHashMap<usize, usize> = XHashMap::with_hasher(Default::default());
372 for _ in 0..430 {
373 let k = rng.gen::<usize>();
374 arr.push((k, k + 1));
375 map.insert(k, k + 1);
376 }
377 b.iter(move || {
378 for &(k, v) in &arr {
379 let n = map.get(&k);
380 assert_eq!(*n.unwrap(), v);
381 }
382 });
383 }
384}