1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
use crate::prelude::HashSet;
impl HashSet {
pub const fn xor_from(key: u64, block_length: usize, seed: u64) -> Self {
let hash = crate::prelude::mix(key, seed);
Self {
hash,
hset: [
crate::xor_h!(index block 0, of length block_length, using hash),
crate::xor_h!(index block 1, of length block_length, using hash),
crate::xor_h!(index block 2, of length block_length, using hash),
],
}
}
}
/// Computes a hash indexing the i'th filter block.
#[doc(hidden)]
#[macro_export]
macro_rules! xor_h(
(index block $i:expr, of length $block_length:expr, using $hash:expr) => {
{
let rot = $crate::rotl64!($hash, by (($i as isize) * 21)) as u32; // shift hash to correct block interval
$crate::reduce!(rot on interval $block_length)
}
};
);
/// Creates a `contains(u64)` implementation for an xor filter of fingerprint type `$fpty`.
#[doc(hidden)]
#[macro_export]
macro_rules! xor_contains_impl(
($key:expr, $self:expr, fingerprint $fpty:ty) => {
{
use $crate::prelude::HashSet;
let HashSet {
hash,
hset: [h0, h1, h2],
} = HashSet::xor_from($key, $self.block_length, $self.seed);
let fp = $crate::fingerprint!(hash) as $fpty;
fp == $self.fingerprints[h0]
^ $self.fingerprints[(h1 + $self.block_length)]
^ $self.fingerprints[(h2 + 2 * $self.block_length)]
}
};
);
/// Creates an `from(&[u64])` implementation for an xor filter of fingerprint type `$fpty`.
#[doc(hidden)]
#[macro_export]
macro_rules! xor_from_impl(
($keys:ident fingerprint $fpty:ty) => {
{
use $crate::{
fingerprint,
xor_h,
make_block,
prelude::{HashSet, HSet, KeyIndex},
splitmix64::splitmix64,
try_enqueue,
};
#[cfg(debug_assertions)] {
use $crate::prelude::all_distinct;
debug_assert!(all_distinct($keys.clone()), "Xor filters must be constructed from a collection containing all distinct keys.");
}
// See Algorithm 3 in the paper.
let num_keys = $keys.len();
let capacity = (1.23 * num_keys as f64) as usize + 32;
let capacity = capacity / 3 * 3; // round to nearest multiple of 3
let block_length = capacity / 3;
#[allow(non_snake_case)]
let mut H: [Box<[HSet]>; 3] = [
make_block!(with capacity sets),
make_block!(with capacity sets),
make_block!(with capacity sets),
];
#[allow(non_snake_case)]
let mut Q: [Box<[KeyIndex]>; 3] = [
make_block!(with capacity sets),
make_block!(with capacity sets),
make_block!(with capacity sets),
];
let mut stack: Box<[KeyIndex]> = make_block!(with num_keys sets);
let mut rng = 1;
let mut seed = splitmix64(&mut rng);
loop {
// Populate H by adding each key to its respective set.
for key in $keys.clone() {
let HashSet { hash, hset } = HashSet::xor_from(key, block_length, seed);
for b in 0..3 {
let setindex = hset[b];
H[b][setindex].mask ^= hash;
H[b][setindex].count += 1;
}
}
// Scan for sets with a single key. Add these keys to the queue.
let mut q_sizes: [usize; 3] = [0, 0, 0];
for b in 0..3 {
for idx in 0..(block_length) {
try_enqueue!(block H[b], set idx;
queue block Q[b], with size q_sizes[b]);
}
}
let mut stack_size = 0;
while q_sizes.iter().sum::<usize>() > 0 {
macro_rules! dequeue(
(block $block:expr, other blocks being $a:expr, $b:expr) => {
while q_sizes[$block] > 0 {
// Remove an element from the queue.
q_sizes[$block] -= 1;
let mut ki = Q[$block][q_sizes[$block]];
if H[$block][ki.index].count == 0 {
continue;
}
// If it's the only element in its respective set in H, add it to
// the output stack.
ki.index += $block * block_length;
stack[stack_size] = ki;
stack_size += 1;
// Remove the element from every other set and enqueue any sets
// that now only have one element.
for j in &[$a, $b] {
let idx = xor_h!(index block *j, of length block_length, using ki.hash);
H[*j][idx].mask ^= ki.hash;
H[*j][idx].count -= 1;
try_enqueue!(block H[*j], set idx;
queue block Q[*j], with size q_sizes[*j]);
}
}
};
);
dequeue!(block 0, other blocks being 1, 2);
dequeue!(block 1, other blocks being 0, 2);
dequeue!(block 2, other blocks being 0, 1);
}
if stack_size == num_keys {
break;
}
// Filter failed to be created; reset and try again.
for block in H.iter_mut() {
for set in block.iter_mut() {
*set = HSet::default();
}
}
seed = splitmix64(&mut rng)
}
// Construct all fingerprints (see Algorithm 4 in the paper).
#[allow(non_snake_case)]
let mut B: Box<[$fpty]> = make_block!(with capacity sets);
for ki in stack.iter().rev() {
B[ki.index] = fingerprint!(ki.hash) as $fpty
^ B[xor_h!(index block 0, of length block_length, using ki.hash)]
^ B[(xor_h!(index block 1, of length block_length, using ki.hash) + block_length)]
^ B[(xor_h!(index block 2, of length block_length, using ki.hash) + 2 * block_length)];
}
Self {
seed,
block_length,
fingerprints: B,
}
}
};
);