1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
//! Metrics data structure module
use num::traits::Pow;
use rayon::prelude::*;
#[cfg(feature = "serde_support")]
use serde::{Deserialize, Serialize};
/// The data structure that contains the metrics about the current `SBF` structure.
///
/// This data structure is automatically added to each `SBF` if the feature `metrics` is enabled.
/// It's not necessary and is disabled by default.
#[cfg(feature = "metrics")]
#[derive(Clone, Debug)]
#[cfg_attr(feature = "serde_support", derive(Serialize, Deserialize))]
pub struct Metrics {
/// Number of cells in the filter, the size of the filter
pub(crate) cells: usize,
/// Number of hash functions
pub(crate) hash_number: usize,
/// Number of inserted values
pub(crate) members: usize,
/// Number of collisions occurred
pub(crate) collisions: usize,
/// Safeness probability over the entire filter
pub(crate) safeness: f64,
/// Number of areas
pub(crate) area_number: usize,
/// Number of members per area
pub(crate) area_members: Vec<usize>,
/// Expected number of cells for each area
pub(crate) area_expected_cells: Vec<i64>,
/// Number of cells occupied by each area
pub(crate) area_cells: Vec<usize>,
/// Number of collision of the same area value on the same cell
pub(crate) area_self_collisions: Vec<usize>,
/// Prior false positive probability for each area
pub(crate) area_prior_fpp: Vec<f64>,
/// Posterior false positive probability for each area
pub(crate) area_fpp: Vec<f64>,
/// Prior inter-set error probability for each area
pub(crate) area_prior_isep: Vec<f64>,
/// Posterior inter-set error probability for each area
pub(crate) area_isep: Vec<f64>,
/// Prior area-specific safeness probability
pub(crate) area_prior_safep: Vec<f64>,
}
#[cfg(feature = "metrics")]
impl Metrics {
/// Returns the number of inserted elements for the input area
pub fn get_area_members(&self, index: usize) -> Option<usize> {
self.area_members.get(index).cloned()
}
/// Returns the sparsity of the entire SBF
pub fn get_filter_sparsity(&self) -> f64 {
let sum: usize = self.area_cells
.par_iter()
.skip(1) // Skip the index 0
.cloned()
.sum();
1.0 - (sum as f64 / self.hash_number as f64)
}
/// Returns the posterior false positive probability over the entire filter
/// (i.e. not area-specific)
pub fn get_filter_fpp(&self) -> f64 {
let non_zero_cells: usize = self.area_cells.par_iter().cloned().sum();
let p = non_zero_cells as f64 / self.cells as f64;
p.pow(self.hash_number as f64)
}
/// Returns the expected emersion value for the input area
pub fn get_expected_area_emersion(&self, area: usize) -> f64 {
let cells_with_greater_area_index: usize = self.area_members
.par_iter()
.skip(area)
.skip(1)
.sum();
let p = 1.0f64 - 1.0f64 / self.cells as f64;
p.pow(self.hash_number as f64 * cells_with_greater_area_index as f64)
}
/// Returns the emersion value for the input area
pub fn get_area_emersion(&self, area: usize) -> Option<f64> {
if self.area_cells[area] == 0 || self.hash_number == 0 {
None
} else {
match (
self.area_cells.get(area),
self.area_members.get(area),
self.area_self_collisions.get(area)) {
(
Some(&area_cells),
Some(&area_members),
Some(&area_self_collisions)
) => {
let a = area_cells as f64;
let b = area_members as f64 * self.hash_number as f64 -
area_self_collisions as f64;
Some(a / b)
}
_ => None
}
}
}
/// Returns the prior false positive probability over the entire filter
pub fn get_filter_prior_fpp(&self) -> f64 {
let p = 1.0 - 1.0 / self.cells as f64;
let p = 1.0 - p.powf(self.hash_number as f64 * self.members as f64);
p.powf(self.hash_number as f64)
}
/// Computes posterior area-specific false positives probability (fpp)
#[allow(clippy::integer_arithmetic)]
pub fn set_area_fpp(&mut self) {
println!("AREA NUMBER: {}", self.area_number);
(1..self.area_number)
.rev()
.for_each(|i| {
let c: usize = (i..self.area_number)
.map(|j| self.area_cells[j])
.sum();
let p = c as f64 / self.cells as f64;
let p = p.powi(self.hash_number as i32);
self.area_fpp[i] = p;
(i..self.area_number - 1)
.for_each(|j| {
self.area_fpp[i] -= self.area_fpp[j + 1];
});
self.area_fpp[i] = self.area_fpp[i].max(0.0);
})
}
/// Computes prior area-specific false positives probability (prior_fpp)
#[allow(clippy::integer_arithmetic)]
pub fn set_prior_area_fpp(&mut self) {
(1..self.area_number)
.rev()
.for_each(|i| {
let c: usize = (i..self.area_number)
.map(|j| self.area_members[j])
.sum();
let p = 1.0 - 1.0 / self.cells as f64;
let p = 1.0 - p.powi((self.hash_number * c) as i32);
let p = p.powi(self.hash_number as i32);
self.area_fpp[i] = p;
(i..self.area_number - 1)
.for_each(|j| {
self.area_prior_fpp[i] -= self.area_prior_fpp[j + 1];
});
self.area_prior_fpp[i] = self.area_prior_fpp[i].max(0.0);
})
}
/// Computes posterior area-specific inter-set error probability (isep)
pub fn set_area_isep(&mut self) {
(1..self.area_number)
.rev()
.for_each(|i| {
let p = 1.0 - self.get_area_emersion(i).unwrap_or(-1.0);
let p = p.powi(self.hash_number as i32);
self.area_isep[i] = p;
})
}
/// Computes prior area-specific inter-set error probability (prior_isep),
/// computes prior area-specific safeness probability (prior_safep) and
/// the overall safeness probability for the entire filter (safeness)
pub fn set_prior_area_isep(&mut self) {
let mut p3 = 1.0;
(1..self.area_number)
.rev()
.for_each(|i| {
let n_fill: usize = (i..self.area_number)
.skip(1) // first element
.map(|j| self.area_members[j])
.sum();
let p1 = 1.0 - 1.0 / self.cells as f64;
let p1 = 1.0 - p1.powi((self.hash_number * n_fill) as i32);
let p1 = p1.powi(self.area_members[i] as i32);
let p2 = (1.0 - p1).powi(self.area_members[i] as i32);
p3 *= p2;
self.area_prior_isep[i] = p1;
self.area_prior_safep[i] = p2;
});
self.safeness = p3;
}
/// Computes the expected number of cells for each area (expected_cells)
#[allow(clippy::integer_arithmetic)]
pub fn set_expected_area_cells(&mut self) {
(1..self.area_number)
.rev()
.for_each(|i| {
let n_fill: usize = (i..self.area_number)
.map(|j| self.area_members[j])
.sum();
let p1 = 1.0 - 1.0 / self.cells as f64;
let p2 = p1.pow((self.hash_number * n_fill) as f64);
self.area_expected_cells[i] = (self.cells as f64 * p1 * p2) as i64;
})
}
}