1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
// TODO: run test w/ FilterSize3 distribution, try xor with other

/// FilterSize bounds the allocated size and false-positive rate of a
/// [`Bloom2`](crate::Bloom2) instance.
///
/// The false positive probability for a bloom filter increases as the number of
/// entries increases. This relationship is demonstrated using 64bit hashes as
/// keys for each possible filter configuration below - you should choose a
/// filter size for your expected load level and hash size.
///
/// The value of FilterSize controls the `k` property of the filter: `k =
/// input_length_bytes / FilterSize`.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum FilterSize {
	/// 1 byte / 8 bits per key results in a bloom filter with a minimum memory
	/// usage of ~4 bytes and a maximum memory usage of 36 bytes.
	///
	/// The false positive probability using `k=1` (a single byte key per entry)
	/// grows proportionally to the number of entries in the filter:
	///
	/// ```text
	///           +--+------------+------------+-----------+------------+------------+-----+
	///         1 +                                                *                   *   +
	///           |                                                                        |
	///           |                                   *                                    |
	///     P 0.8 +                                                                        +
	///     r     |                                                                        |
	///     o     |                                                                        |
	///     b 0.6 +                         *                                              +
	///     a     |                                                                        |
	///     b     |                                                                        |
	///     i 0.4 +                                                                        +
	///     l     |                  *                                                     |
	///     i     |                                                                        |
	///     t 0.2 +                                                                        +
	///     y     |                                                                        |
	///           |              *                                                         |
	///         0 +  ***** * *                                                             +
	///           +--+------------+------------+-----------+------------+------------+-----+
	///              0           50           100         150          200          250     
	///                                       Number of Entries                             
	/// ```
	///
	/// The probability of false positives reaches 1-in-2 after 80 entries.
	///
	/// An empty sparse bloom filter would require 1x64 bit block map entries (8
	/// bytes) to map 4 64 bit blocks, containing a total of 256 bits (memory
	/// saving: 75%)
	///
	KeyBytes1 = 1,

	/// 2 bytes / 16 bits per key results in a bloom filter with a minimum
	/// memory usage of ~1024 bytes and a maximum memory usage of ~8KB when
	/// fully populated.
	///
	/// When using a 64bit hash (4x2 byte keys, `k=4`) the probability of a
	/// false positive is:
	///
	/// ```text
	///           +--+------------------------+-----------------------+--------------------+
	///         1 +                                                *                  *    +
	///           |                                  *                                     |
	///           |                                                                        |
	///     P 0.8 +                         *                                              +
	///     r     |                                                                        |
	///     o     |                                                                        |
	///     b 0.6 +                                                                        +
	///     a     |                  *                                                     |
	///     b     |                                                                        |
	///     i 0.4 +                                                                        +
	///     l     |              *                                                         |
	///     i     |                                                                        |
	///     t 0.2 +                                                                        +
	///     y     |          *                                                             |
	///           |        *                                                               |
	///         0 +  *****                                                                 +
	///           +--+------------------------+-----------------------+--------------------+
	///              0                      50000                   1e+05                   
	///                                       Number of Entries                             
	/// ```
	///
	/// The probability of false positives reaches 1-in-2 after 30118 entries.
	///
	/// An empty sparse bloom filter would require 16x64 bit block map entries
	/// (128 bytes) to map 1024 64 bit blocks, containing a total of 65536 bits
	/// (memory saving: 98.4375%)
	///
	KeyBytes2 = 2,

	/// 3 bytes / 24 bits per key results in a bloom filter with a minimum
	/// memory usage of ~262KB bytes and a maximum memory usage of ~2MB when
	/// fully populated.
	///
	/// When using a 64bit hash (2x3 byte keys, `k=2`) the probability of a
	/// false positive is:
	///
	/// ```text
	///         1 +--+------------------+-------------------+------------------+-----------+
	///           |                                                                   *    |
	///           |                                                *                       |
	///       0.8 +                                                                        +
	///     P     |                                  *                                     |
	///     r     |                                                                        |
	///     o     |                                                                        |
	///     b 0.6 +                         *                                              +
	///     a     |                                                                        |
	///     b     |                                                                        |
	///     i 0.4 +                  *                                                     +
	///     l     |                                                                        |
	///     i     |              *                                                         |
	///     t 0.2 +                                                                        +
	///     y     |          *                                                             |
	///           |      * *                                                               |
	///         0 +  ****                                                                  +
	///           +--+------------------+-------------------+------------------+-----------+
	///              0                1e+07               2e+07              3e+07          
	///                                       Number of Entries                             
	/// ```
	///
	/// The probability of false positives reaches 1-in-2 after 10300768
	/// entries.
	///
	/// An empty sparse bloom filter would require 4096x64 bit block map entries
	/// (32768 bytes) to map 262144 64 bit blocks, containing a total of
	/// 16777216 bits (memory saving: 98.4375%)
	///
	KeyBytes3 = 3,

	/// 4 bytes / 32 bits per key results in a bloom filter with a minimum
	/// memory usage of ~67MB and a maximum memory usage of ~603MB when fully
	/// populated.
	///
	/// When using a 64bit hash (2x4 byte keys, `k=2`) the probability of a
	/// false positive is:
	///
	/// ```text
	///         1 +--+--------------+--------------+--------------+--------------+---------+
	///           |                                                                   *    |
	///           |                                                *                       |
	///       0.8 +                                                                        +
	///     P     |                                  *                                     |
	///     r     |                                                                        |
	///     o     |                                                                        |
	///     b 0.6 +                         *                                              +
	///     a     |                                                                        |
	///     b     |                                                                        |
	///     i 0.4 +                  *                                                     +
	///     l     |                                                                        |
	///     i     |              *                                                         |
	///     t 0.2 +                                                                        +
	///     y     |          *                                                             |
	///           |      * *                                                               |
	///         0 +  ****                                                                  +
	///           +--+--------------+--------------+--------------+--------------+---------+
	///              0            2e+09          4e+09          6e+09          8e+09        
	///                                       Number of Entries                             
	/// ```
	///
	/// The probability of false positives reaches 1-in-2 after 2636996484
	/// entries.
	///
	/// An empty sparse bloom filter would require 1048576x64 bit block map
	/// entries (8388608 bytes) to map 67108864 64 bit blocks, containing a
	/// total of 4294967296 bits (memory saving: 98.4375%)
	///
	KeyBytes4 = 4,

	/// 5 bytes / 32 bits per key results in a bloom filter with a minimum
	/// memory usage of ~17GB and a maximum memory usage of ~1117GB when fully
	/// populated.
	///
	/// If you actually need this get in touch - I have some ideas for reducing
	/// the memory footprint even further.
	///
	/// When using a 64bit hash (1x5 byte keys, `k=1`) the probability of a
	/// false positive is:
	///
	/// ```text
	///           +--+----------+---------+---------+----------+---------+---------+-------+
	///         1 +                                                *                  *    +
	///           |                                  *                                     |
	///           |                         *                                              |
	///     P 0.8 +                                                                        +
	///     r     |                  *                                                     |
	///     o     |              *                                                         |
	///     b 0.6 +                                                                        +
	///     a     |          *                                                             |
	///     b     |                                                                        |
	///     i 0.4 +        *                                                               +
	///     l     |                                                                        |
	///     i     |      *                                                                 |
	///     t 0.2 +     *                                                                  +
	///     y     |    *                                                                   |
	///           |   *                                                                    |
	///         0 +  **                                                                    +
	///           +--+----------+---------+---------+----------+---------+---------+-------+
	///              0        1e+12     2e+12     3e+12      4e+12     5e+12     6e+12      
	///                                       Number of Entries                             
	/// ```
	///
	/// The probability of false positives reaches 1-in-2 after 762123384786
	/// entries.
	///
	/// An empty sparse bloom filter would require 268435456x64 bit block map
	/// entries (2147483648 bytes) to map 17179869184 64 bit blocks, containing
	/// a total of 1099511627776 bits (memory saving: 98.4375%)
	///
	KeyBytes5 = 5,
}