1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#![doc(html_root_url="http://sfackler.github.io/rust-phf/doc")]
extern crate phf_shared;
extern crate rand;
use phf_shared::PhfHash;
use rand::{SeedableRng, XorShiftRng, Rng};
const DEFAULT_LAMBDA: usize = 5;
const FIXED_SEED: [u32; 4] = [3141592653, 589793238, 462643383, 2795028841];
pub struct HashState {
pub key: u64,
pub disps: Vec<(u32, u32)>,
pub map: Vec<usize>,
}
pub fn generate_hash<H: PhfHash>(entries: &[H]) -> HashState {
let mut rng: XorShiftRng = SeedableRng::from_seed(FIXED_SEED);
loop {
if let Some(s) = try_generate_hash(entries, &mut rng) {
return s;
}
}
}
fn try_generate_hash<H: PhfHash>(entries: &[H], rng: &mut XorShiftRng) -> Option<HashState> {
struct Bucket {
idx: usize,
keys: Vec<usize>,
}
struct Hashes {
g: u32,
f1: u32,
f2: u32,
}
let key = rng.gen();
let hashes: Vec<_> = entries.iter().map(|entry| {
let (g, f1, f2) = entry.phf_hash(key);
Hashes {
g: g,
f1: f1,
f2: f2
}
}).collect();
let buckets_len = (entries.len() + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA;
let mut buckets = (0..buckets_len)
.map(|i| Bucket { idx: i, keys: vec![] })
.collect::<Vec<_>>();
for (i, hash) in hashes.iter().enumerate() {
buckets[(hash.g % (buckets_len as u32)) as usize].keys.push(i);
}
buckets.sort_by(|a, b| a.keys.len().cmp(&b.keys.len()).reverse());
let table_len = entries.len();
let mut map = vec![None; table_len];
let mut disps = vec![(0u32, 0u32); buckets_len];
let mut try_map = vec![0u64; table_len];
let mut generation = 0u64;
let mut values_to_add = vec![];
'buckets: for bucket in buckets.iter() {
for d1 in 0..(table_len as u32) {
'disps: for d2 in 0..(table_len as u32) {
values_to_add.clear();
generation += 1;
for &key in bucket.keys.iter() {
let idx = (phf_shared::displace(hashes[key].f1, hashes[key].f2, d1, d2)
% (table_len as u32)) as usize;
if map[idx].is_some() || try_map[idx] == generation {
continue 'disps;
}
try_map[idx] = generation;
values_to_add.push((idx, key));
}
disps[bucket.idx] = (d1, d2);
for &(idx, key) in values_to_add.iter() {
map[idx] = Some(key);
}
continue 'buckets;
}
}
return None;
}
Some(HashState {
key: key,
disps: disps,
map: map.into_iter().map(|i| i.unwrap()).collect(),
})
}