1use dense::DenseRegisters;
2use explicit::ExplicitStorage;
3use settings::{Settings, SettingsError};
4use sparse::SparseRegisters;
5use thiserror::Error;
6
7mod dense;
8#[cfg(test)]
9mod dense_test;
10mod explicit;
11#[cfg(test)]
12mod integration_test;
13mod settings;
14mod sparse;
15#[cfg(test)]
16mod sparse_test;
17mod utils;
18
19trait Registers {
21 fn log_2m(&self) -> u32;
22 fn pw_max_mask(&self) -> u64;
23 fn m_bits_mask(&self) -> u64;
24
25 fn set_if_greater(&mut self, reg_num: u32, value: u8);
28
29 fn indicator(&self) -> (f64, u32);
34
35 fn set(&mut self, value: u64) {
38 let substream_value = value >> self.log_2m();
49 if substream_value == 0 {
50 return;
55 }
56
57 let p_w = (1 + (substream_value | self.pw_max_mask()).trailing_zeros()) as u8;
60 let i = value & self.m_bits_mask();
62
63 self.set_if_greater(i as u32, p_w);
65 }
66}
67
68pub trait Storage {
69 fn bytes_size(&self) -> usize;
70 fn to_bytes(&self, buf: &mut [u8]);
71 fn from_bytes(settings: &Settings, buf: &[u8]) -> Self;
72 fn clear(&mut self);
73}
74
75#[derive(Clone, Debug, Error)]
76pub enum HllError {
77 #[error("{0}")]
78 Settings(#[from] SettingsError),
79 #[error("invalid version {0}")]
80 Version(u8),
81}
82
83#[derive(Clone, Debug, PartialEq)]
84pub enum Hll {
85 Empty(Settings),
86 Explicit(ExplicitStorage),
87 Sparse(SparseRegisters),
88 Dense(DenseRegisters),
89}
90
91impl Hll {
92 pub fn new(settings: Settings) -> Self {
93 Hll::Empty(settings)
94 }
95
96 pub(crate) fn add_raw(&mut self, value: u64) {
97 if value == 0 {
98 return;
99 }
100
101 if let Hll::Empty(settings) = self {
102 if settings.explicit_threshold() > 0 {
103 *self = Hll::Explicit(ExplicitStorage::with_settings(settings));
104 } else if settings.sparse_threshold.is_some() {
105 let registers = SparseRegisters::with_settings(settings);
106 *self = Hll::Sparse(registers);
107 } else {
108 *self = Hll::Dense(DenseRegisters::with_settings(settings));
109 }
110 }
111
112 match self {
113 Hll::Explicit(explicit_registers) => {
114 explicit_registers.set(value);
115 if explicit_registers.is_full() {
116 *self = explicit_registers.as_registers();
117 }
118 }
119 Hll::Sparse(sparse_registers) => {
120 sparse_registers.set(value);
121
122 if sparse_registers.is_full() {
123 *self = Hll::Dense(sparse_registers.to_dense(None));
124 }
125 }
126 Hll::Dense(dense_registers) => {
127 dense_registers.set(value);
128 }
129 _ => {}
130 }
131 }
132
133 pub fn union(&mut self, strict: bool, other: &Self) -> Result<(), HllError> {
134 if strict {
135 self.settings_check(other)?;
136 }
137
138 match self {
139 Hll::Empty(settings) => {
140 *self = match &other {
141 Hll::Sparse(sparse_registers) => match settings.sparse_threshold {
142 Some(sparse_threshold) => {
143 if sparse_threshold < sparse_registers.len() as i32 {
144 Hll::Dense(sparse_registers.to_dense(Some(settings)))
145 } else {
146 Hll::Sparse(sparse_registers.clone())
147 }
148 }
149 None => Hll::Dense(sparse_registers.to_dense(Some(settings))),
150 },
151 _ => other.clone(),
152 };
153 }
154 Hll::Explicit(lhs) => match other {
155 Hll::Empty(_settings) => {}
156 Hll::Explicit(rhs) => {
157 lhs.union_explicit(rhs);
158 }
159 Hll::Sparse(_sparse_registers) => {
160 let mut new_storage = lhs.as_registers();
161 new_storage.union(strict, other)?;
162
163 *self = new_storage;
164 }
165 Hll::Dense(_dense_registers) => {
166 let mut new_storage = lhs.as_registers();
167 new_storage.union(strict, other)?;
168
169 *self = new_storage;
170 }
171 },
172 Hll::Sparse(sparse_registers) => match other {
173 Hll::Empty(_settings) => {}
174 Hll::Explicit(explicit_storage) => {
175 sparse_registers.union_explicit(explicit_storage);
176 }
177 Hll::Sparse(rhs_sparse_registers) => {
178 sparse_registers.union_sparse(rhs_sparse_registers);
179 }
180 Hll::Dense(dense_registers) => {
181 let mut new_storage = sparse_registers.to_dense(None);
182 new_storage.union_dense(dense_registers);
183
184 *self = Hll::Dense(new_storage);
185 }
186 },
187 Hll::Dense(dense_registers) => match other {
188 Hll::Empty(_settings) => {}
189 Hll::Explicit(explicit_storage) => {
190 dense_registers.union_explicit(explicit_storage);
191 }
192 Hll::Sparse(sparse_registers) => {
193 dense_registers.union_sparse(sparse_registers);
194 }
195 Hll::Dense(rhs_dense_registers) => {
196 dense_registers.union_dense(rhs_dense_registers);
197 }
198 },
199 }
200
201 if self.is_full() {
202 self.upgrade();
203 }
204
205 Ok(())
206 }
207
208 pub fn cardinality(&self) -> u64 {
209 let (sum, num_of_zeros) = match self {
210 Hll::Empty(_) => return 0,
211 Hll::Explicit(explicit_storage) => return explicit_storage.len(),
212 Hll::Sparse(sparse_registers) => sparse_registers.indicator(),
213 Hll::Dense(dense_registers) => dense_registers.indicator(),
214 };
215
216 let settings = self.settings();
217
218 let estimator = settings.alpha_msquared / sum;
220
221 if (num_of_zeros != 0) && (estimator < settings.small_estimator_cutoff) {
222 let num_of_zeros = num_of_zeros as f64;
227 let m: f64 = (1 << settings.log_2m).into();
228 let small_estimator = m * (m / num_of_zeros).ln();
229 return small_estimator.ceil() as u64;
230 }
231
232 if estimator <= settings.large_estimator_cutoff {
233 return estimator.ceil() as u64;
234 }
235
236 let large_estimator =
241 -1.0 * settings.two_to_l * (1.0 - (estimator / settings.two_to_l)).ln();
242 large_estimator.ceil() as u64
243 }
244
245 fn is_full(&self) -> bool {
246 match self {
247 Hll::Empty(_) => false,
248 Hll::Explicit(explicit_storage) => explicit_storage.is_full(),
249 Hll::Sparse(sparse_registers) => sparse_registers.is_full(),
250 Hll::Dense(_) => false,
251 }
252 }
253
254 fn upgrade(&mut self) {
255 match self {
256 Hll::Empty(_) => {}
257 Hll::Explicit(explicit_storage) => {
258 *self = explicit_storage.as_registers();
259 }
260 Hll::Sparse(sparse_registers) => {
261 *self = Hll::Dense(sparse_registers.to_dense(None));
262 }
263 Hll::Dense(_) => {}
264 }
265 }
266
267 pub fn settings_check(&self, other: &Self) -> Result<(), SettingsError> {
268 self.settings().settings_check(other.settings())
269 }
270
271 pub fn settings(&self) -> &Settings {
272 match self {
273 Hll::Empty(settings) => settings,
274 Hll::Explicit(explicit_storage) => &explicit_storage.settings,
275 Hll::Sparse(sparse_registers) => &sparse_registers.settings,
276 Hll::Dense(dense_registers) => &dense_registers.settings,
277 }
278 }
279
280 pub fn clone_with_settings(&self, settings: &Settings) -> Self {
281 match self {
282 Hll::Empty(_) => Hll::Empty(*settings),
283 Hll::Explicit(explicit_storage) => {
284 Hll::Explicit(explicit_storage.clone_with_settings(settings))
285 }
286 Hll::Sparse(sparse_registers) => {
287 Hll::Sparse(sparse_registers.clone_with_settings(settings))
288 }
289 Hll::Dense(dense_registers) => {
290 Hll::Dense(dense_registers.clone_with_settings(settings))
291 }
292 }
293 }
294
295 pub fn type_id(&self) -> u8 {
296 match self {
297 Hll::Empty(_) => 1,
298 Hll::Explicit(_) => 2,
299 Hll::Sparse(_) => 3,
300 Hll::Dense(_) => 4,
301 }
302 }
303
304 pub fn to_bytes(&self) -> Vec<u8> {
305 let (settings, type_id, size) = match self {
306 Hll::Empty(settings) => (settings, 1, 0),
307 Hll::Explicit(explicit_storage) => {
308 (&explicit_storage.settings, 2, explicit_storage.bytes_size())
309 }
310 Hll::Sparse(sparse_registers) => {
311 (&sparse_registers.settings, 3, sparse_registers.bytes_size())
312 }
313 Hll::Dense(dense_registers) => {
314 (&dense_registers.settings, 4, dense_registers.bytes_size())
315 }
316 };
317 let mut res: Vec<u8> = vec![0; 3 + size];
318
319 res[0] = (1 << 4) | type_id;
320 res[1] = (((settings.reg_width - 1) << 5) | settings.log_2m) as u8;
321 res[2] = settings.pack_cutoff_byte();
322
323 match self {
324 Hll::Empty(_settings) => {}
325 Hll::Explicit(explicit_storage) => {
326 explicit_storage.to_bytes(&mut res[3..]);
327 }
328 Hll::Sparse(sparse_registers) => {
329 sparse_registers.to_bytes(&mut res[3..]);
330 }
331 Hll::Dense(dense_registers) => {
332 dense_registers.to_bytes(&mut res[3..]);
333 }
334 }
335
336 res
337 }
338
339 pub fn from_bytes(buf: &[u8]) -> Result<Self, HllError> {
340 let version = buf[0] >> 4;
341 let type_id = buf[0] & 0x0F;
342
343 if version != 1 {
344 return Err(HllError::Version(version));
345 }
346
347 let reg_width = (buf[1] >> 5) + 1;
348 let log_2m = buf[1] & 0x1F;
349 let (sparse_enabled, explicit_threshold) = Settings::unpack_cutoff_byte(buf[2]);
350
351 let settings = Settings::new(
352 log_2m as u32,
353 reg_width as u32,
354 explicit_threshold,
355 sparse_enabled,
356 )?;
357
358 let storage = match type_id {
359 1 => Self::Empty(settings),
360 2 => Self::Explicit(ExplicitStorage::from_bytes(&settings, &buf[3..])),
361 3 => Self::Sparse(SparseRegisters::from_bytes(&settings, &buf[3..])),
362 4 => Self::Dense(DenseRegisters::from_bytes(&settings, &buf[3..])),
363 _ => {
364 return Err(HllError::Version(type_id));
365 }
366 };
367
368 Ok(storage)
369 }
370
371 pub fn clear(&mut self) {
372 match self {
373 Hll::Empty(_) => {}
374 Hll::Explicit(explicit_storage) => explicit_storage.clear(),
375 Hll::Sparse(sparse_registers) => sparse_registers.clear(),
376 Hll::Dense(dense_registers) => dense_registers.clear(),
377 }
378 }
379}
380
381#[cfg(test)]
382mod tests {
383 use super::*;
384
385 #[test]
386 fn test_hll() {
387 let settings = Settings::new(
389 10, 4, -1, true, )
394 .unwrap();
395
396 let mut hll = Hll::new(settings);
398
399 hll.add_raw(123456789);
401 println!("Cardinality: {}", hll.cardinality()); let mut hll2 = Hll::new(settings);
405 hll2.add_raw(123456789);
406 hll2.add_raw(987654321);
407
408 hll2.union(true, &hll).unwrap();
410 println!("Cardinality after union: {}", hll2.cardinality()); let bytes = hll2.to_bytes();
414
415 let hll3 = Hll::from_bytes(&bytes).unwrap();
417 println!("Cardinality after deserialization: {}", hll3.cardinality()); }
419}