zetasketch_rs/
hyperloglogplusplus.rs

1// SPDX-FileCopyrightText: 2025 Daniel Vrátil <me@dvratil.cz>
2//
3// SPDX-License-Identifier: MIT
4//
5// Based on the original Zetasketch implementation by Google:
6// https://github.com/google/zetasketch
7// Published under the Apache License 2.0
8
9use std::collections::HashSet;
10
11use crate::{
12    aggregator::Aggregator,
13    error::SketchError,
14    hll::{
15        hash::Hash, normal_representation::NormalRepresentation, representation::Representation,
16        sparse_representation::SparseRepresentation, state::State, value_type::ValueType,
17    },
18    protos::{AggregatorStateProto, AggregatorType, DefaultOpsTypeId},
19};
20use protobuf::Message;
21
22/// Type of the HLL sketch
23#[derive(Clone, Copy, PartialEq, Eq, Hash)]
24enum Type {
25    Long,
26    Integer,
27    String,
28    Bytes,
29}
30
31impl std::fmt::Display for Type {
32    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
33        f.write_str(match self {
34            Type::Long => "LONG",
35            Type::Integer => "INTEGER",
36            Type::String => "STRING",
37            Type::Bytes => "BYTES",
38        })
39    }
40}
41
42impl std::fmt::Debug for Type {
43    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
44        std::fmt::Display::fmt(self, f)
45    }
46}
47
48impl Type {
49    pub fn all() -> HashSet<Type> {
50        HashSet::from([Type::Long, Type::Integer, Type::String, Type::Bytes])
51    }
52
53    pub fn from_value_type(value_type: ValueType) -> Result<HashSet<Type>, SketchError> {
54        match value_type {
55            ValueType::DefaultOpsType(DefaultOpsTypeId::UINT64) => Ok(HashSet::from([Type::Long])),
56            ValueType::DefaultOpsType(DefaultOpsTypeId::UINT32) => {
57                Ok(HashSet::from([Type::Integer]))
58            }
59            ValueType::DefaultOpsType(DefaultOpsTypeId::BYTES_OR_UTF8_STRING) => {
60                Ok(HashSet::from([Type::String, Type::Bytes]))
61            }
62            _ => Err(SketchError::InvalidState(format!(
63                "Unsupported value type {value_type:?}"
64            ))),
65        }
66    }
67
68    pub fn extract_and_normalize(state: &State) -> Result<HashSet<Type>, SketchError> {
69        if state.value_type == ValueType::Unknown {
70            Ok(Type::all())
71        } else {
72            Type::from_value_type(state.value_type)
73        }
74    }
75}
76
77impl From<Type> for ValueType {
78    fn from(val: Type) -> Self {
79        match val {
80            Type::Long => ValueType::DefaultOpsType(DefaultOpsTypeId::UINT64),
81            Type::Integer => ValueType::DefaultOpsType(DefaultOpsTypeId::UINT32),
82            Type::String => ValueType::DefaultOpsType(DefaultOpsTypeId::BYTES_OR_UTF8_STRING),
83            Type::Bytes => ValueType::DefaultOpsType(DefaultOpsTypeId::BYTES_OR_UTF8_STRING),
84        }
85    }
86}
87
88/// HLL++ aggregator for estimating cardinalities of multisets.
89///
90/// The aggregator uses the standard format for storing the internal state of the cardinality
91/// estimate as defined in hllplus-unique.proto, allowing users to merge aggregators with data
92/// computed in C++ or Go and to load up the cardinalities in a variety of analysis tools.
93///
94/// The precision defines the accuracy of the HLL++ aggregator at the cost of the memory used. The
95/// upper bound on the memory required is 2<sup>precision</sup> bytes, but less memory is used for
96/// smaller cardinalities (up to ~2<sup>precision - 2</sup>). The relative error is 1.04 /
97/// sqrt(2<sup>precision</sup>). A typical value used at Google is 15, which gives an error of about
98///  0.6% while requiring an upper bound of 32 KiB of memory.
99#[derive(Debug, Clone)]
100pub struct HyperLogLogPlusPlus {
101    representation: Representation,
102    allowed_types: HashSet<Type>,
103}
104
105impl HyperLogLogPlusPlus {
106    /// The smallest normal precision supported by this aggregator.
107    pub const MINIMUM_PRECISION: i32 = NormalRepresentation::MINIMUM_PRECISION;
108    /// The largest normal precision supported by this aggregator.
109    pub const MAXIMUM_PRECISION: i32 = NormalRepresentation::MAXIMUM_PRECISION;
110    /// The default normal precision that is used if the user does not specify a normal precision.
111    pub const DEFAULT_NORMAL_PRECISION: i32 = 15;
112    /// The largest sparse precision supported by this aggregator.
113    pub const MAXIMUM_SPARSE_PRECISION: i32 = SparseRepresentation::MAXIMUM_SPARSE_PRECISION;
114    /// Value used to indicate that the sparse representation should not be used.
115    pub const SPARSE_PRECISION_DISABLED: i32 = SparseRepresentation::SPARSE_PRECISION_DISABLED;
116    /// If no sparse precision is specified, this value is added to the normal precision to obtain
117    /// the sparse precision, which optimizes the memory-precision trade-off.
118    pub const DEFAULT_SPARSE_PRECISION_DELTA: i32 = 5;
119    /// The encoding version of the [`AggregatorStateProto`]. We only support v2.
120    pub const ENCODING_VERSION: i32 = 2;
121
122    /// Returns a new builder to customize and create a new instance of this aggregator.
123    pub fn builder() -> HyperLogLogPlusPlusBuilder {
124        HyperLogLogPlusPlusBuilder::new()
125    }
126
127    pub(crate) fn from_state(state: State) -> Result<Self, SketchError> {
128        if state.r#type != AggregatorType::HYPERLOGLOG_PLUS_UNIQUE {
129            return Err(SketchError::InvalidState(format!(
130                "Expected proto to be of type HYPERLOGLOG_PLUS_UNIQUE but was {:?}",
131                state.r#type
132            )));
133        }
134        if state.encoding_version != Self::ENCODING_VERSION {
135            return Err(SketchError::InvalidState(format!(
136                "Expected encoding version to be {:?} but was {:?}",
137                Self::ENCODING_VERSION,
138                state.encoding_version
139            )));
140        }
141        let allowed_types = Type::extract_and_normalize(&state)?;
142        Ok(Self {
143            representation: Representation::from_state(state)?,
144            allowed_types,
145        })
146    }
147
148    /// Creates a new HyperLogLog++ aggregator from the serialized `proto`.
149    ///
150    /// The `proto` must be a valid aggregator state of type [`AggregatorType::HYPERLOGLOG_PLUS_UNIQUE`].
151    pub fn from_proto(proto: AggregatorStateProto) -> Result<Self, SketchError> {
152        let bytes = proto
153            .write_to_bytes()
154            .map_err(SketchError::ProtoDeserialization)?;
155        Self::from_bytes(&bytes)
156    }
157
158    /// Creates a new HyperLogLog++ aggregator from the `bytes`.
159    ///
160    /// The `bytes` must be a valid serialized [`AggregatorStateProto`] of the type
161    /// [`AggregatorType::HYPERLOGLOG_PLUS_UNIQUE`].
162    pub fn from_bytes(bytes: &[u8]) -> Result<Self, SketchError> {
163        Self::from_state(State::parse(bytes)?)
164    }
165
166    /// Add `value` to the aggregator.
167    ///
168    /// Returns [`SketchError`] if the aggregator is of different type than `i32` or `u32`.
169    /// See [`HyperLogLogPlusPlusBuilder::build_for_u32`].
170    pub fn add_i32(&mut self, value: i32) -> Result<(), SketchError> {
171        self.check_and_set_type(Type::Integer)?;
172        self.add_hash(Hash::of_i32(value))
173    }
174
175    /// Add `value` to the aggregator.
176    ///
177    /// Returns [`SketchError`] if the aggregator is of different type than `i32` or `u32`.
178    /// See [`HyperLogLogPlusPlusBuilder::build_for_u32`].
179    pub fn add_u32(&mut self, value: u32) -> Result<(), SketchError> {
180        self.check_and_set_type(Type::Integer)?;
181        self.add_hash(Hash::of_u32(value))
182    }
183
184    /// Add `value` to the aggregator.
185    ///
186    /// Returns [`SketchError`] if the aggregator is of different type than `i64` or `u64`.
187    /// See [`HyperLogLogPlusPlusBuilder::build_for_u64`].
188    pub fn add_i64(&mut self, value: i64) -> Result<(), SketchError> {
189        self.check_and_set_type(Type::Long)?;
190        self.add_hash(Hash::of_i64(value))
191    }
192
193    /// Add `value` to the aggregator.
194    ///
195    /// Returns [`SketchError`] if the aggregator is of different type than `i64` or `u64`.
196    /// See [`HyperLogLogPlusPlusBuilder::build_for_u64`].
197    pub fn add_u64(&mut self, value: u64) -> Result<(), SketchError> {
198        self.check_and_set_type(Type::Long)?;
199        self.add_hash(Hash::of_u64(value))
200    }
201
202    /// Add `value` to the aggregator.
203    ///
204    /// Returns [`SketchError`] if the aggregator is of different type than `bytes`.
205    /// See [`HyperLogLogPlusPlusBuilder::build_for_bytes`].
206    pub fn add_bytes(&mut self, value: &[u8]) -> Result<(), SketchError> {
207        self.check_and_set_type(Type::Bytes)?;
208        self.add_hash(Hash::of_bytes(value))
209    }
210
211    /// Add `value` to the aggregator.
212    ///
213    /// Returns [`SketchError`] if the aggregator is of different type than `string`.
214    /// See [`HyperLogLogPlusPlusBuilder::build_for_string`].
215    pub fn add_string(&mut self, value: &str) -> Result<(), SketchError> {
216        self.check_and_set_type(Type::String)?;
217        self.add_hash(Hash::of_string(value))
218    }
219
220    /// Returns the normal precision of the aggregator.
221    pub fn normal_precision(&self) -> i32 {
222        self.representation.state().precision
223    }
224
225    /// Returns the sparse precision of the aggregator.
226    pub fn sparse_precision(&self) -> i32 {
227        self.representation.state().sparse_precision
228    }
229
230    fn add_hash(&mut self, hash: u64) -> Result<(), SketchError> {
231        self.representation.add_hash(hash)?;
232        self.representation.state_mut().num_values += 1;
233        Ok(())
234    }
235
236    fn check_type_and_merge(&mut self, other: HyperLogLogPlusPlus) -> Result<(), SketchError> {
237        let mut new_types = self.allowed_types.clone();
238        new_types.retain(|t| other.allowed_types.contains(t));
239        if new_types.is_empty() {
240            return Err(SketchError::InvalidState(format!(
241                "Aggregator of type {:?} is incompatible with aggregator of type {:?}",
242                self.allowed_types, other.allowed_types
243            )));
244        }
245
246        let num_values = other.representation.state().num_values;
247        self.representation.merge(other.representation)?;
248        self.representation.state_mut().num_values += num_values;
249        // Only updat the allowed  types after a successful merge
250        self.allowed_types = new_types;
251        Ok(())
252    }
253
254    fn check_and_set_type(&mut self, r#type: Type) -> Result<(), SketchError> {
255        if !self.allowed_types.contains(&r#type) {
256            return Err(SketchError::InvalidState(format!(
257                "Unable to add type {:?} to aggregator of type {:?}",
258                r#type, self.allowed_types
259            )));
260        }
261
262        // Narrow the type if necessary.
263        if self.allowed_types.len() > 1 {
264            self.allowed_types.clear();
265            self.allowed_types.insert(r#type);
266            self.representation.state_mut().value_type = r#type.into();
267        }
268        Ok(())
269    }
270}
271
272impl Aggregator<i64, HyperLogLogPlusPlus> for HyperLogLogPlusPlus {
273    fn result(&self) -> Result<i64, SketchError> {
274        self.representation.estimate()
275    }
276
277    fn merge_aggregator(&mut self, other: HyperLogLogPlusPlus) -> Result<(), SketchError> {
278        self.check_type_and_merge(other)
279    }
280
281    fn merge_proto(&mut self, proto: AggregatorStateProto) -> Result<(), SketchError> {
282        self.merge_aggregator(HyperLogLogPlusPlus::from_proto(proto)?)
283    }
284
285    fn merge_bytes(&mut self, data: &[u8]) -> Result<(), SketchError> {
286        self.merge_aggregator(HyperLogLogPlusPlus::from_bytes(data)?)
287    }
288
289    fn num_values(&self) -> u64 {
290        self.representation.state().num_values as u64
291    }
292
293    fn serialize_to_bytes(mut self) -> Result<Vec<u8>, SketchError> {
294        self.representation.compact()?;
295        self.representation.state().to_byte_array()
296    }
297
298    fn serialize_to_proto(mut self) -> Result<AggregatorStateProto, SketchError> {
299        self.representation.compact()?;
300        let bytes = self.representation.state().to_byte_array()?;
301        AggregatorStateProto::parse_from_bytes(&bytes).map_err(SketchError::ProtoDeserialization)
302    }
303}
304
305#[derive(Debug, Clone)]
306pub struct HyperLogLogPlusPlusBuilder {
307    normal_precision: i32,
308    sparse_precision: Option<i32>,
309}
310
311impl HyperLogLogPlusPlusBuilder {
312    pub(crate) fn new() -> Self {
313        Self {
314            normal_precision: HyperLogLogPlusPlus::DEFAULT_NORMAL_PRECISION,
315            sparse_precision: None,
316        }
317    }
318
319    /// Sets the normal precision to be used. Must be in the range from [`HyperLogLogPlusPlus::MINIMUM_PRECISION`]
320    /// to [`HyperLogLogPlusPlus::MAXIMUM_PRECISION`] (inclusive).
321    ///
322    /// The precision defines the accuracy of the HLL++ aggregator at the cost of the memory used.
323    /// The upper bound on the memory required is 2<sup>precision</sup> bytes, but less memory is
324    /// used for smaller cardinalities (up to ~2<sup>precision - 2</sup>). The relative error is 1.04
325    /// / sqrt(2<sup>precision</sup>). If not specified, [`HyperLogLogPlusPlus::DEFAULT_NORMAL_PRECISION`]` is used,
326    ///  which gives an error of about 0.6% while requiring an upper bound of 32 nbsp;KiB of memory.
327    pub fn normal_precision(mut self, normal_precision: i32) -> Self {
328        self.normal_precision = normal_precision;
329        self
330    }
331
332    /// Sets the sparse precision to be used. Must be in the range from the [`HyperLogLogPlusPlusBuilder::normal_precision`]
333    /// to [`HyperLogLogPlusPlus::MAXIMUM_SPARSE_PRECISION`] (inclusive), or [`HyperLogLogPlusPlus::SPARSE_PRECISION_DISABLED`]
334    /// to disable the use of the sparse representation. We recommend to use [`HyperLogLogPlusPlusBuilder::no_sparse_mode`]
335    /// for the latter, though.
336    ///
337    /// If not specified, the normal precision + [`HyperLogLogPlusPlus::DEFAULT_SPARSE_PRECISION_DELTA`] is used.
338    pub fn sparse_precision(mut self, sparse_precision: i32) -> Self {
339        self.sparse_precision = Some(sparse_precision);
340        self
341    }
342
343    /// Disable the "sparse representation" mode; i.e., the normal representation, where all
344    /// registers are explicitly stored, and its method to compute the `COUNT DISTINCT` estimate
345    /// are used from the start of the aggregation.
346    pub fn no_sparse_mode(self) -> Self {
347        self.sparse_precision(HyperLogLogPlusPlus::SPARSE_PRECISION_DISABLED)
348    }
349
350    /// Returns a new HLL++ aggregator for counting the number of unique byte arrays in a stream.
351    pub fn build_for_bytes(self) -> Result<HyperLogLogPlusPlus, SketchError> {
352        HyperLogLogPlusPlus::from_state(self.build_state(DefaultOpsTypeId::BYTES_OR_UTF8_STRING))
353    }
354
355    /// Returns a new HLL++ aggregator for counting the number of unique strings in a stream.
356    pub fn build_for_string(self) -> Result<HyperLogLogPlusPlus, SketchError> {
357        HyperLogLogPlusPlus::from_state(self.build_state(DefaultOpsTypeId::BYTES_OR_UTF8_STRING))
358    }
359
360    /// Returns a new HLL++ aggregator for counting the number of unique 32-bit integers in a stream.
361    pub fn build_for_u32(self) -> Result<HyperLogLogPlusPlus, SketchError> {
362        HyperLogLogPlusPlus::from_state(self.build_state(DefaultOpsTypeId::UINT32))
363    }
364
365    /// Returns a new HLL++ aggregator for counting the number of unique 64-bit integers in a stream.
366    pub fn build_for_u64(self) -> Result<HyperLogLogPlusPlus, SketchError> {
367        HyperLogLogPlusPlus::from_state(self.build_state(DefaultOpsTypeId::UINT64))
368    }
369
370    fn build_state(self, ops_type: DefaultOpsTypeId) -> State {
371        State {
372            r#type: AggregatorType::HYPERLOGLOG_PLUS_UNIQUE,
373            encoding_version: HyperLogLogPlusPlus::ENCODING_VERSION,
374            precision: self.normal_precision,
375            sparse_precision: match self.sparse_precision {
376                Some(precision) => precision,
377                None => self.normal_precision + HyperLogLogPlusPlus::DEFAULT_SPARSE_PRECISION_DELTA,
378            },
379            value_type: ValueType::DefaultOpsType(ops_type),
380            ..State::default()
381        }
382    }
383}
384
385#[cfg(test)]
386mod tests {
387    use super::*;
388    use crate::{
389        aggregator::Aggregator, // Assuming this trait might be used generally
390        error::SketchError,
391        protos::{
392            zetasketch::hllplus_unique::HyperLogLogPlusUniqueStateProto, AggregatorStateProto,
393            AggregatorType as ProtoAggregatorType, DefaultOpsTypeId as ProtoDefaultOpsTypeId,
394        },
395    };
396    use protobuf::UnknownValueRef; // For to_byte_array, parse_from_bytes
397
398    struct JavaRand {
399        seed: u64,
400    }
401
402    // Java-compatible pseudo-random number generator.
403    // This follows the exact algorithm described for java.util.Random, ensuring that our tests run with the same
404    // pseudo-random data as the Java tests, which makes debugging differences much easier.
405    impl JavaRand {
406        const MULTIPLIER: u64 = 0x5DEECE66D;
407        const MASK: u64 = (1u64 << 48) - 1;
408
409        fn initial_scramble(seed: u64) -> u64 {
410            (seed ^ Self::MULTIPLIER) & Self::MASK
411        }
412
413        pub fn new(seed: u64) -> Self {
414            Self {
415                seed: Self::initial_scramble(seed),
416            }
417        }
418
419        pub fn next(&mut self, bits: u32) -> u32 {
420            let new_seed =
421                (self.seed.wrapping_mul(0x5DEECE66D).wrapping_add(0xB)) & ((1u64 << 48) - 1);
422            self.seed = new_seed;
423            (self.seed >> (48 - bits)) as u32
424        }
425
426        fn next_int_bounded(&mut self, bound: i32) -> i32 {
427            if bound <= 0 {
428                panic!("bound must be positive");
429            }
430
431            if (bound & -bound) == bound {
432                // Power of 2 - use bit masking
433                return ((bound as i64 * self.next(31) as i64) >> 31) as i32;
434            }
435
436            // Rejection sampling to avoid bias
437            let mut bits;
438            let mut val;
439            loop {
440                bits = self.next(31) as i32;
441                val = bits % bound;
442                if bits - val + (bound - 1) >= 0 {
443                    break;
444                }
445            }
446            val
447        }
448
449        pub fn next_i64(&mut self) -> i64 {
450            let high = self.next(32) as i32;
451            let low = self.next(32) as i32;
452            ((high as i64) << 32) + (low as i64)
453        }
454    }
455
456    const TEST_NORMAL_PRECISION: i32 = HyperLogLogPlusPlus::DEFAULT_NORMAL_PRECISION; // 15
457    const TEST_SPARSE_PRECISION: i32 =
458        TEST_NORMAL_PRECISION + HyperLogLogPlusPlus::DEFAULT_SPARSE_PRECISION_DELTA; // 20, default in Java tests sometimes use 25
459
460    // Helper for default builder from Java tests (sparsePrecision 25)
461    fn hll_builder_java_default_sparse() -> HyperLogLogPlusPlusBuilder {
462        HyperLogLogPlusPlus::builder().sparse_precision(25)
463    }
464
465    // Helper to create AggregatorStateProto for BYTES_OR_UTF8_STRING type
466    fn byte_or_string_type_state_proto_helper() -> AggregatorStateProto {
467        let mut hll_unique_proto = HyperLogLogPlusUniqueStateProto::new();
468        hll_unique_proto.set_precision_or_num_buckets(TEST_NORMAL_PRECISION);
469        hll_unique_proto.set_sparse_precision_or_num_buckets(25); // As in Java test
470
471        let mut proto = AggregatorStateProto::new();
472        proto.set_type(ProtoAggregatorType::HYPERLOGLOG_PLUS_UNIQUE);
473        proto.set_encoding_version(HyperLogLogPlusPlus::ENCODING_VERSION);
474        proto.set_num_values(0);
475
476        let vt = ValueType::DefaultOpsType(ProtoDefaultOpsTypeId::BYTES_OR_UTF8_STRING);
477        proto.set_value_type(vt.into());
478
479        set_hll_extension(&mut proto, hll_unique_proto);
480        proto
481    }
482
483    // Helper to create AggregatorStateProto for UNKNOWN type
484    fn unknown_type_state_proto_helper() -> AggregatorStateProto {
485        let mut hll_unique_proto = HyperLogLogPlusUniqueStateProto::new();
486        hll_unique_proto.set_precision_or_num_buckets(TEST_NORMAL_PRECISION);
487        hll_unique_proto.set_sparse_precision_or_num_buckets(25); // As in Java test
488
489        let mut proto = AggregatorStateProto::new();
490        proto.set_type(ProtoAggregatorType::HYPERLOGLOG_PLUS_UNIQUE);
491        proto.set_encoding_version(HyperLogLogPlusPlus::ENCODING_VERSION);
492        proto.set_num_values(0);
493        // No ValueTypeInfo for UNKNOWN type
494
495        set_hll_extension(&mut proto, hll_unique_proto);
496        proto
497    }
498
499    fn get_hll_extension(proto: &AggregatorStateProto) -> HyperLogLogPlusUniqueStateProto {
500        let ext_data = proto
501            .unknown_fields()
502            .get(112)
503            .expect("HLL extension not found");
504
505        match ext_data {
506            UnknownValueRef::LengthDelimited(data) => {
507                HyperLogLogPlusUniqueStateProto::parse_from_bytes(data)
508                    .expect("Failed to parse HLL extension")
509            }
510            _ => panic!("Unexpected extension type: {ext_data:?}"),
511        }
512    }
513
514    fn set_hll_extension(
515        proto: &mut AggregatorStateProto,
516        hll_ext: HyperLogLogPlusUniqueStateProto,
517    ) {
518        proto
519            .mut_unknown_fields()
520            .add_length_delimited(112, hll_ext.write_to_bytes().unwrap());
521    }
522
523    #[test]
524    fn test_merge_multiple_sparse_representations_into_a_normal_one() {
525        let normal_precision = 13;
526        let sparse_precision = 16;
527        let hll_builder = HyperLogLogPlusPlus::builder()
528            .normal_precision(normal_precision)
529            .sparse_precision(sparse_precision);
530
531        let num_sketches = 100;
532        let mut random = JavaRand::new(123);
533
534        let mut agg_state_protos: Vec<AggregatorStateProto> = Vec::new();
535        let mut overall_aggregator = hll_builder
536            .clone()
537            .build_for_u64()
538            .expect("Failed to build overall_aggregator");
539
540        for _i in 0..num_sketches {
541            let max = (1 << normal_precision) / 2;
542            let num_values = random.next_int_bounded(max) + 1;
543
544            let mut aggregator = hll_builder
545                .clone()
546                .build_for_u64()
547                .expect("Failed to build aggregator");
548
549            for _k in 0..num_values {
550                let value = random.next_i64() as u64;
551                aggregator.add_u64(value).unwrap_or_else(|_| {
552                    panic!("Failed to add value {value} to aggregator (i={_i}, k={_k})")
553                });
554                overall_aggregator
555                    .add_u64(value)
556                    .expect("Failed to add value to overall_aggregator");
557            }
558
559            let proto = aggregator
560                .serialize_to_proto()
561                .expect("Failed to serialize aggregator");
562            let hll_ext = get_hll_extension(&proto);
563            assert!(
564                !hll_ext.sparse_data().is_empty(),
565                "Expected sparse data for individual sketch"
566            );
567            assert!(
568                hll_ext.data().is_empty(),
569                "Expected no normal data for individual sparse sketch"
570            );
571            agg_state_protos.push(proto);
572        }
573
574        let expected_proto = overall_aggregator
575            .serialize_to_proto()
576            .expect("Failed to serialize overall_aggregator");
577        let overall_hll_ext = get_hll_extension(&expected_proto);
578        assert!(
579            overall_hll_ext.sparse_data().is_empty(),
580            "Expected no sparse data for overall sketch"
581        );
582        assert!(
583            !overall_hll_ext.data().is_empty(),
584            "Expected normal data for overall sketch"
585        );
586
587        let mut merged_aggregator = HyperLogLogPlusPlus::from_proto(agg_state_protos[0].clone())
588            .expect("Failed to build merged_aggregator from proto");
589        for agg_proto in agg_state_protos.iter().skip(1) {
590            merged_aggregator
591                .merge_proto(agg_proto.clone())
592                .expect("Failed to merge proto");
593        }
594
595        assert_eq!(
596            merged_aggregator
597                .serialize_to_proto()
598                .expect("Serialize failed"),
599            expected_proto
600        );
601    }
602
603    #[test]
604    fn add_bytes() {
605        let mut aggregator = hll_builder_java_default_sparse()
606            .build_for_bytes()
607            .expect("build failed");
608        aggregator.add_bytes(&[12]).expect("add_bytes failed");
609        assert_eq!(aggregator.result().expect("result failed"), 1);
610        assert_eq!(aggregator.num_values(), 1);
611    }
612
613    #[test]
614    fn add_bytes_throws_when_other_type() {
615        let mut aggregator = hll_builder_java_default_sparse()
616            .build_for_u64()
617            .expect("build failed"); // Build for Longs
618        let result = aggregator.add_bytes(&[12]);
619        assert!(result.is_err());
620        if let Err(SketchError::InvalidState(msg)) = result {
621            assert!(msg.contains("Unable to add type BYTES to aggregator of type {LONG}"));
622        } else {
623            panic!("Unexpected error type: {result:?}");
624        }
625    }
626
627    #[test]
628    fn add_bytes_to_byte_or_string_type() {
629        let mut aggregator =
630            HyperLogLogPlusPlus::from_proto(byte_or_string_type_state_proto_helper())
631                .expect("from_proto failed");
632        aggregator.add_bytes(&[12]).expect("add_bytes failed"); // First add sets the type to BYTES
633
634        let result = aggregator.add_string("foo"); // Second add with different type (STRING)
635        assert!(result.is_err());
636        if let Err(SketchError::InvalidState(msg)) = result {
637            // Type is now fixed to BYTES
638            assert!(msg.contains("Unable to add type STRING to aggregator of type {BYTES}"));
639        } else {
640            panic!("Unexpected error type: {result:?}");
641        }
642    }
643
644    #[test]
645    fn add_bytes_to_uninitialized() {
646        let mut aggregator = HyperLogLogPlusPlus::from_proto(unknown_type_state_proto_helper())
647            .expect("from_proto failed");
648        aggregator.add_bytes(&[12]).expect("add_bytes failed"); // First add sets type to BYTES
649
650        let result = aggregator.add_u64(42); // Try adding Long
651        assert!(result.is_err());
652        if let Err(SketchError::InvalidState(msg)) = result {
653            assert!(msg.contains("Unable to add type LONG to aggregator of type {BYTES}"));
654        } else {
655            panic!("Unexpected error type: {result:?}");
656        }
657    }
658
659    #[test]
660    fn add_integer() {
661        // u32 in Rust
662        let mut aggregator = hll_builder_java_default_sparse()
663            .build_for_u32()
664            .expect("build failed");
665        aggregator.add_u32(1).expect("add_u32 failed");
666        assert_eq!(aggregator.result().expect("result failed"), 1);
667        assert_eq!(aggregator.num_values(), 1);
668    }
669
670    #[test]
671    fn add_integer_throws_when_other_type() {
672        let mut aggregator = hll_builder_java_default_sparse()
673            .build_for_u64()
674            .expect("build failed"); // Build for Longs
675        let result = aggregator.add_u32(1); // Try adding Integer
676        assert!(result.is_err());
677        if let Err(SketchError::InvalidState(msg)) = result {
678            assert!(msg.contains("Unable to add type INTEGER to aggregator of type {LONG}"));
679        } else {
680            panic!("Unexpected error type: {result:?}");
681        }
682    }
683
684    #[test]
685    fn add_integer_to_uninitialized() {
686        let mut aggregator = HyperLogLogPlusPlus::from_proto(unknown_type_state_proto_helper())
687            .expect("from_proto failed");
688        aggregator.add_u32(42).expect("add_u32 failed"); // First add sets type to INTEGER
689
690        let result = aggregator.add_u64(42); // Try adding Long
691        assert!(result.is_err());
692        if let Err(SketchError::InvalidState(msg)) = result {
693            assert!(msg.contains("Unable to add type LONG to aggregator of type {INTEGER}"));
694        } else {
695            panic!("Unexpected error type: {result:?}");
696        }
697    }
698
699    #[test]
700    fn add_long() {
701        // u64 in Rust
702        let mut aggregator = hll_builder_java_default_sparse()
703            .build_for_u64()
704            .expect("build failed");
705        aggregator.add_u64(1).expect("add_u64 failed");
706        assert_eq!(aggregator.result().expect("result failed"), 1);
707        assert_eq!(aggregator.num_values(), 1);
708    }
709
710    #[test]
711    fn add_long_throws_when_other_type() {
712        let mut aggregator = hll_builder_java_default_sparse()
713            .build_for_u32()
714            .expect("build failed"); // Build for Integer
715        let result = aggregator.add_u64(1); // Try adding Long
716        assert!(result.is_err());
717        if let Err(SketchError::InvalidState(msg)) = result {
718            assert!(msg.contains("Unable to add type LONG to aggregator of type {INTEGER}"));
719        } else {
720            panic!("Unexpected error type: {result:?}");
721        }
722    }
723
724    #[test]
725    fn add_long_to_uninitialized() {
726        let mut aggregator = HyperLogLogPlusPlus::from_proto(unknown_type_state_proto_helper())
727            .expect("from_proto failed");
728        aggregator.add_u64(42).expect("add_u64 failed"); // First add sets type to LONG
729
730        let result = aggregator.add_u32(42); // Try adding Integer
731        assert!(result.is_err());
732        if let Err(SketchError::InvalidState(msg)) = result {
733            assert!(msg.contains("Unable to add type INTEGER to aggregator of type {LONG}"));
734        } else {
735            panic!("Unexpected error type: {result:?}");
736        }
737    }
738
739    #[test]
740    fn add_string() {
741        let mut aggregator = hll_builder_java_default_sparse()
742            .build_for_string()
743            .expect("build failed");
744        aggregator.add_string("foo").expect("add_string failed");
745        assert_eq!(aggregator.result().expect("result failed"), 1);
746        assert_eq!(aggregator.num_values(), 1);
747    }
748
749    #[test]
750    fn add_string_to_byte_or_string_type() {
751        let mut aggregator =
752            HyperLogLogPlusPlus::from_proto(byte_or_string_type_state_proto_helper())
753                .expect("from_proto failed");
754        aggregator.add_string("foo").expect("add_string failed"); // First add sets type to STRING
755
756        let result = aggregator.add_bytes(&[1]); // Second add with different type (BYTES)
757        assert!(result.is_err());
758        if let Err(SketchError::InvalidState(msg)) = result {
759            assert!(msg.contains("Unable to add type BYTES to aggregator of type {STRING}"));
760        } else {
761            panic!("Unexpected error type: {result:?}");
762        }
763    }
764
765    #[test]
766    fn add_string_to_uninitialized() {
767        let mut aggregator = HyperLogLogPlusPlus::from_proto(unknown_type_state_proto_helper())
768            .expect("from_proto failed");
769        aggregator.add_string("foo").expect("add_string failed"); // First add sets type to STRING
770
771        let result = aggregator.add_u32(42); // Try adding Integer
772        assert!(result.is_err());
773        if let Err(SketchError::InvalidState(msg)) = result {
774            assert!(msg.contains("Unable to add type INTEGER to aggregator of type {STRING}"));
775        } else {
776            panic!("Unexpected error type: {result:?}");
777        }
778    }
779
780    #[test]
781    fn create_throws_when_precision_too_large() {
782        let result = HyperLogLogPlusPlus::builder()
783            .normal_precision(HyperLogLogPlusPlus::MAXIMUM_PRECISION + 1)
784            .sparse_precision(25) // valid sparse_p
785            .build_for_u32();
786        assert!(result.is_err());
787        if let Err(SketchError::IllegalArgument(msg)) = result {
788            assert!(msg.contains(&format!(
789                "Expected normal precision to be >= {} and <= {} but was {}",
790                HyperLogLogPlusPlus::MINIMUM_PRECISION,
791                HyperLogLogPlusPlus::MAXIMUM_PRECISION,
792                HyperLogLogPlusPlus::MAXIMUM_PRECISION + 1
793            )));
794        } else {
795            panic!("Unexpected error type or message: {result:?}");
796        }
797    }
798
799    #[test]
800    fn create_throws_when_precision_too_small() {
801        let result = HyperLogLogPlusPlus::builder()
802            .normal_precision(HyperLogLogPlusPlus::MINIMUM_PRECISION - 1)
803            .sparse_precision(25) // valid sparse_p
804            .build_for_u32();
805        assert!(result.is_err());
806        if let Err(SketchError::IllegalArgument(msg)) = result {
807            assert!(msg.contains(&format!(
808                "Expected normal precision to be >= {} and <= {} but was {}",
809                HyperLogLogPlusPlus::MINIMUM_PRECISION,
810                HyperLogLogPlusPlus::MAXIMUM_PRECISION,
811                HyperLogLogPlusPlus::MINIMUM_PRECISION - 1
812            )));
813        } else {
814            panic!("Unexpected error type or message: {result:?}");
815        }
816    }
817
818    #[test]
819    fn from_proto_fails_when_no_extension() {
820        let mut proto = AggregatorStateProto::new();
821        proto.set_type(ProtoAggregatorType::HYPERLOGLOG_PLUS_UNIQUE);
822        proto.set_encoding_version(HyperLogLogPlusPlus::ENCODING_VERSION);
823        proto.set_num_values(0);
824        // No HLL unique extension set
825
826        let result = HyperLogLogPlusPlus::from_proto(proto)
827            .expect_err("HLL should fail to load when extension is missing");
828        if let SketchError::IllegalArgument(msg) = result {
829            assert!(msg.contains("Expected normal precision to be >= 10 and <= 24 but was 0"));
830        } else {
831            panic!("Unexpected error type: {result:?}");
832        }
833    }
834
835    #[test]
836    fn from_proto_fails_when_normal_precision_too_large() {
837        let mut hll_state = HyperLogLogPlusUniqueStateProto::new();
838        hll_state.set_precision_or_num_buckets(HyperLogLogPlusPlus::MAXIMUM_PRECISION + 1);
839        // sparse precision default or valid
840        hll_state.set_sparse_precision_or_num_buckets(
841            HyperLogLogPlusPlus::MAXIMUM_PRECISION
842                + 1
843                + HyperLogLogPlusPlus::DEFAULT_SPARSE_PRECISION_DELTA,
844        );
845
846        let mut proto = AggregatorStateProto::new();
847        proto.set_type(ProtoAggregatorType::HYPERLOGLOG_PLUS_UNIQUE);
848        proto.set_encoding_version(HyperLogLogPlusPlus::ENCODING_VERSION);
849        proto.set_num_values(0);
850        set_hll_extension(&mut proto, hll_state);
851
852        let vt = ValueType::DefaultOpsType(ProtoDefaultOpsTypeId::UINT32);
853        proto.set_value_type(vt.into());
854
855        let result = HyperLogLogPlusPlus::from_proto(proto)
856            .expect_err("HLL should fail to load when normal precision is too large");
857        if let SketchError::IllegalArgument(msg) = result {
858            assert!(msg.contains(&format!(
859                "Expected normal precision to be >= {} and <= {} but was {}",
860                HyperLogLogPlusPlus::MINIMUM_PRECISION,
861                HyperLogLogPlusPlus::MAXIMUM_PRECISION,
862                HyperLogLogPlusPlus::MAXIMUM_PRECISION + 1
863            )));
864        } else {
865            panic!("Unexpected error type or message: {result:?}");
866        }
867    }
868
869    #[test]
870    fn from_proto_fails_when_normal_precision_too_small() {
871        let mut hll_state = HyperLogLogPlusUniqueStateProto::new();
872        hll_state.set_precision_or_num_buckets(HyperLogLogPlusPlus::MINIMUM_PRECISION - 1);
873        hll_state.set_sparse_precision_or_num_buckets(TEST_SPARSE_PRECISION);
874
875        let mut proto = AggregatorStateProto::new();
876        proto.set_type(ProtoAggregatorType::HYPERLOGLOG_PLUS_UNIQUE);
877        proto.set_encoding_version(HyperLogLogPlusPlus::ENCODING_VERSION);
878        proto.set_num_values(0);
879        set_hll_extension(&mut proto, hll_state);
880
881        let vt = ValueType::DefaultOpsType(ProtoDefaultOpsTypeId::UINT32);
882        proto.set_value_type(vt.into());
883
884        let result = HyperLogLogPlusPlus::from_proto(proto)
885            .expect_err("HLL should fail to load when normal precision is too small");
886        if let SketchError::IllegalArgument(msg) = result {
887            assert!(msg.contains(&format!(
888                "Expected normal precision to be >= {} and <= {} but was {}",
889                HyperLogLogPlusPlus::MINIMUM_PRECISION,
890                HyperLogLogPlusPlus::MAXIMUM_PRECISION,
891                HyperLogLogPlusPlus::MINIMUM_PRECISION - 1
892            )));
893        } else {
894            panic!("Unexpected error type or message: {result:?}");
895        }
896    }
897
898    #[test]
899    fn from_proto_fails_when_not_hyperloglogplusplus() {
900        let mut proto = AggregatorStateProto::new();
901        proto.set_type(ProtoAggregatorType::SUM); // Incorrect type
902        proto.set_encoding_version(HyperLogLogPlusPlus::ENCODING_VERSION);
903        proto.set_num_values(0);
904
905        // Extension might not matter or be absent, but main type is wrong
906        let result = HyperLogLogPlusPlus::from_proto(proto)
907            .expect_err("HLL should fail to load when invalid type is set");
908        if let SketchError::InvalidState(msg) = result {
909            assert!(
910                msg.contains("Expected proto to be of type HYPERLOGLOG_PLUS_UNIQUE but was SUM")
911            );
912        } else {
913            panic!("Unexpected error type: {result:?}");
914        }
915    }
916
917    // Test fromProto_ThrowsWhenSparseIsMissingSparsePrecision from Java
918    // In Rust, if sparse_data is set, sparse_precision must be valid (not 0).
919    // SparseRepresentation::new checks if sparse_precision is 0 and errors.
920    // State::from_hll_proto: if sparse_precision is 0 but sparse_data is present, it might error or become normal.
921    // Current Rust code: Representation::from_state checks if sparse_precision != DISABLED and sparse_data is not empty
922    // for it to be sparse. If sparse_precision is 0 (DISABLED), it becomes Normal.
923    // Java test: sparse data is set, but sparse precision is 0. This is an invalid state.
924    #[test]
925    fn from_proto_fails_when_sparse_is_missing_sparse_precision() {
926        let mut hll_state = HyperLogLogPlusUniqueStateProto::new();
927        hll_state.set_precision_or_num_buckets(TEST_NORMAL_PRECISION);
928        hll_state.set_sparse_precision_or_num_buckets(0); // Missing or disabled sparse precision
929        hll_state.set_sparse_data(vec![1]); // But sparse data is present
930
931        let mut proto = AggregatorStateProto::new();
932        proto.set_type(ProtoAggregatorType::HYPERLOGLOG_PLUS_UNIQUE);
933        proto.set_encoding_version(HyperLogLogPlusPlus::ENCODING_VERSION);
934        proto.set_num_values(0);
935        set_hll_extension(&mut proto, hll_state);
936
937        let vt = ValueType::DefaultOpsType(ProtoDefaultOpsTypeId::UINT32);
938        proto.set_value_type(vt.into());
939
940        let result = HyperLogLogPlusPlus::from_proto(proto)
941            .expect_err("HLL should fail to load when sparse precision is missing");
942        if let SketchError::InvalidState(msg) = result {
943            assert!(msg.contains("Must have a sparse precision when sparse data is set"));
944        } else {
945            panic!("Unexpected error type: {result:?}");
946        }
947    }
948
949    #[test]
950    fn from_proto_fails_when_sparse_precision_too_large() {
951        let normal_p = 15;
952        let sparse_p = HyperLogLogPlusPlus::MAXIMUM_SPARSE_PRECISION + 1; // 26, too large
953
954        let mut hll_state = HyperLogLogPlusUniqueStateProto::new();
955        hll_state.set_precision_or_num_buckets(normal_p);
956        hll_state.set_sparse_precision_or_num_buckets(sparse_p);
957
958        let mut proto = AggregatorStateProto::new();
959        proto.set_type(ProtoAggregatorType::HYPERLOGLOG_PLUS_UNIQUE);
960        proto.set_encoding_version(HyperLogLogPlusPlus::ENCODING_VERSION);
961        proto.set_num_values(0);
962        set_hll_extension(&mut proto, hll_state);
963
964        let vt = ValueType::DefaultOpsType(ProtoDefaultOpsTypeId::UINT32);
965        proto.set_value_type(vt.into());
966
967        let result = HyperLogLogPlusPlus::from_proto(proto)
968            .expect_err("HLL should fail to load when sparse precision is too large");
969        if let SketchError::IllegalArgument(msg) = result {
970            assert!(msg.contains(&format!(
971                "Expected sparse precision to be >= normal precision ({}) and <= {} but was {}.",
972                normal_p,
973                HyperLogLogPlusPlus::MAXIMUM_SPARSE_PRECISION,
974                sparse_p
975            )));
976        } else {
977            panic!("Unexpected error type or message: {result:?}");
978        }
979    }
980
981    #[test]
982    fn from_proto_fails_when_sparse_precision_too_small() {
983        let normal_p = 15;
984        let sparse_p = normal_p - 1; // 14, too small (must be >= normal_p)
985
986        let mut hll_state = HyperLogLogPlusUniqueStateProto::new();
987        hll_state.set_precision_or_num_buckets(normal_p);
988        hll_state.set_sparse_precision_or_num_buckets(sparse_p);
989
990        let mut proto = AggregatorStateProto::new();
991        proto.set_type(ProtoAggregatorType::HYPERLOGLOG_PLUS_UNIQUE);
992        proto.set_encoding_version(HyperLogLogPlusPlus::ENCODING_VERSION);
993        proto.set_num_values(0);
994        set_hll_extension(&mut proto, hll_state);
995
996        let vt = ValueType::DefaultOpsType(ProtoDefaultOpsTypeId::UINT32);
997        proto.set_value_type(vt.into());
998
999        let result = HyperLogLogPlusPlus::from_proto(proto)
1000            .expect_err("HLL should fail to load when sparse precision is too small");
1001        if let SketchError::IllegalArgument(msg) = result {
1002            assert!(msg.contains(&format!(
1003                "Expected sparse precision to be >= normal precision ({}) and <= {} but was {}.",
1004                normal_p,
1005                HyperLogLogPlusPlus::MAXIMUM_SPARSE_PRECISION,
1006                sparse_p
1007            )));
1008        } else {
1009            panic!("Unexpected error type or message: {result:?}");
1010        }
1011    }
1012
1013    #[test]
1014    fn from_proto_when_normal() {
1015        let normal_p = 15;
1016        let mut hll_state = HyperLogLogPlusUniqueStateProto::new();
1017        hll_state.set_precision_or_num_buckets(normal_p);
1018        // No sparse_precision explicitly set, or set to 0 for normal.
1019        // If sparse_precision is not set, State::from_hll_proto uses normal_p + DELTA
1020        // To force normal, sparse_precision should be 0 OR data field set.
1021        hll_state.set_sparse_precision_or_num_buckets(0); // Mark as normal
1022        hll_state.set_data(vec![0; 1 << normal_p]); // Normal data
1023
1024        let mut proto = AggregatorStateProto::new();
1025        proto.set_type(ProtoAggregatorType::HYPERLOGLOG_PLUS_UNIQUE);
1026        proto.set_encoding_version(HyperLogLogPlusPlus::ENCODING_VERSION);
1027        proto.set_num_values(1);
1028        set_hll_extension(&mut proto, hll_state);
1029
1030        let vt = ValueType::DefaultOpsType(ProtoDefaultOpsTypeId::UINT32);
1031        proto.set_value_type(vt.into());
1032
1033        let aggregator =
1034            HyperLogLogPlusPlus::from_proto(proto).expect("from_proto failed for normal");
1035        // Estimate for all zeros data is 0 (or close to it)
1036        assert!(aggregator.result().expect("result failed") >= 0); // Exact estimate is complex for all-zero data
1037        assert_eq!(aggregator.num_values(), 1);
1038        assert!(aggregator.representation.is_normal());
1039    }
1040
1041    #[test]
1042    fn from_proto_when_sparse() {
1043        let normal_p = 15;
1044        let sparse_p = 25;
1045        let mut hll_state = HyperLogLogPlusUniqueStateProto::new();
1046        hll_state.set_precision_or_num_buckets(normal_p);
1047        hll_state.set_sparse_precision_or_num_buckets(sparse_p);
1048        hll_state.set_sparse_data(vec![1]); // Sparse data
1049        hll_state.set_sparse_size(1); // From Java test
1050
1051        let mut proto = AggregatorStateProto::new();
1052        proto.set_type(ProtoAggregatorType::HYPERLOGLOG_PLUS_UNIQUE);
1053        proto.set_encoding_version(HyperLogLogPlusPlus::ENCODING_VERSION);
1054        proto.set_num_values(2); // From Java test
1055        set_hll_extension(&mut proto, hll_state);
1056
1057        let vt = ValueType::DefaultOpsType(ProtoDefaultOpsTypeId::UINT32);
1058        proto.set_value_type(vt.into());
1059
1060        let aggregator =
1061            HyperLogLogPlusPlus::from_proto(proto).expect("from_proto failed for sparse");
1062        assert_eq!(aggregator.result().expect("result failed"), 1); // Java test expects 1
1063        assert_eq!(aggregator.num_values(), 2);
1064        assert!(aggregator.representation.is_sparse());
1065    }
1066
1067    #[test]
1068    fn from_proto_byte_array() {
1069        let normal_p = 15;
1070        let sparse_p = 25;
1071        let mut hll_state = HyperLogLogPlusUniqueStateProto::new();
1072        hll_state.set_precision_or_num_buckets(normal_p);
1073        hll_state.set_sparse_precision_or_num_buckets(sparse_p);
1074        hll_state.set_sparse_data(vec![1]);
1075        hll_state.set_sparse_size(1);
1076
1077        let mut proto = AggregatorStateProto::new();
1078        proto.set_type(ProtoAggregatorType::HYPERLOGLOG_PLUS_UNIQUE);
1079        proto.set_encoding_version(HyperLogLogPlusPlus::ENCODING_VERSION);
1080        proto.set_num_values(2);
1081        set_hll_extension(&mut proto, hll_state);
1082
1083        let vt = ValueType::DefaultOpsType(ProtoDefaultOpsTypeId::UINT32);
1084        proto.set_value_type(vt.into());
1085
1086        let byte_array = proto.write_to_bytes().expect("write_to_bytes failed");
1087        let aggregator = HyperLogLogPlusPlus::from_bytes(&byte_array).expect("from_bytes failed");
1088
1089        assert_eq!(aggregator.result().expect("result failed"), 1);
1090        assert_eq!(aggregator.num_values(), 2);
1091    }
1092
1093    #[test]
1094    fn from_proto_byte_array_throws_when_invalid() {
1095        let result = HyperLogLogPlusPlus::from_bytes(&[1, 2, 3]); // Invalid proto data
1096        assert!(result.is_err());
1097        if let Err(SketchError::ProtoDeserialization(_)) = result {
1098            // Correct error type
1099        } else {
1100            panic!("Unexpected error type: {result:?}");
1101        }
1102    }
1103
1104    #[test]
1105    fn long_result_simple() {
1106        let mut aggregator = hll_builder_java_default_sparse()
1107            .build_for_u32()
1108            .expect("build failed");
1109        aggregator.add_u32(1).expect("add failed");
1110        aggregator.add_u32(2).expect("add failed");
1111        aggregator.add_u32(3).expect("add failed");
1112        aggregator.add_u32(2).expect("add failed"); // Duplicate
1113        aggregator.add_u32(3).expect("add failed"); // Duplicate
1114        assert_eq!(aggregator.result().expect("result failed"), 3);
1115    }
1116
1117    #[test]
1118    fn long_result_zero_when_empty() {
1119        let aggregator = hll_builder_java_default_sparse()
1120            .build_for_u32()
1121            .expect("build failed");
1122        assert_eq!(aggregator.result().expect("result failed"), 0);
1123    }
1124
1125    #[test]
1126    fn merge_from_proto() {
1127        let mut aggregator = hll_builder_java_default_sparse()
1128            .build_for_u32()
1129            .expect("build failed");
1130
1131        let mut hll_state_to_merge = HyperLogLogPlusUniqueStateProto::new();
1132        hll_state_to_merge.set_precision_or_num_buckets(TEST_NORMAL_PRECISION);
1133        hll_state_to_merge.set_sparse_precision_or_num_buckets(25); // Matching sparse precision
1134        hll_state_to_merge.set_sparse_data(vec![1]);
1135        hll_state_to_merge.set_sparse_size(1);
1136
1137        let mut proto_to_merge = AggregatorStateProto::new();
1138        proto_to_merge.set_type(ProtoAggregatorType::HYPERLOGLOG_PLUS_UNIQUE);
1139        proto_to_merge.set_encoding_version(HyperLogLogPlusPlus::ENCODING_VERSION);
1140        proto_to_merge.set_num_values(2);
1141        set_hll_extension(&mut proto_to_merge, hll_state_to_merge);
1142
1143        let vt = ValueType::DefaultOpsType(ProtoDefaultOpsTypeId::UINT32);
1144        proto_to_merge.set_value_type(vt.into());
1145
1146        aggregator
1147            .merge_proto(proto_to_merge)
1148            .expect("merge_proto failed");
1149        assert_eq!(aggregator.result().expect("result failed"), 1);
1150        assert_eq!(aggregator.num_values(), 2); // Num values should be sum
1151    }
1152
1153    #[test]
1154    fn merge_normal_into_normal_with_higher_precision() {
1155        let mut a = HyperLogLogPlusPlus::builder()
1156            .no_sparse_mode() // Uses MAX_SPARSE_P, effectively sparse but test means "normal rep"
1157            .build_for_u32()
1158            .expect("Build A failed");
1159
1160        a.add_u32(1).unwrap();
1161        a.add_u32(2).unwrap();
1162        a.add_u32(3).unwrap();
1163
1164        let mut b = HyperLogLogPlusPlus::builder()
1165            .normal_precision(13) // Higher precision
1166            .no_sparse_mode()
1167            .build_for_u32()
1168            .expect("Build B failed");
1169        b.add_u32(3).unwrap();
1170        b.add_u32(4).unwrap();
1171
1172        a.merge_aggregator(b).expect("Merge failed");
1173
1174        assert_eq!(a.normal_precision(), 13);
1175        assert_eq!(a.sparse_precision(), 0);
1176        assert_eq!(a.result().unwrap(), 4);
1177        assert_eq!(a.num_values(), 5);
1178        //assert_eq!(b.result().unwrap(), 2);
1179        //assert_eq!(b.num_values(), 2);
1180    }
1181
1182    #[test]
1183    fn num_values_simple() {
1184        let mut aggregator = hll_builder_java_default_sparse()
1185            .build_for_u32()
1186            .expect("build failed");
1187        aggregator.add_u32(1).unwrap();
1188        aggregator.add_u32(2).unwrap();
1189        aggregator.add_u32(3).unwrap();
1190        aggregator.add_u32(2).unwrap();
1191        aggregator.add_u32(3).unwrap();
1192        assert_eq!(aggregator.num_values(), 5);
1193    }
1194
1195    #[test]
1196    fn num_values_zero_when_empty() {
1197        let aggregator = hll_builder_java_default_sparse()
1198            .build_for_u32()
1199            .expect("build failed");
1200        assert_eq!(aggregator.num_values(), 0);
1201    }
1202
1203    #[test]
1204    fn serialize_to_proto_empty_aggregator_sets_empty_sparse_data_field() {
1205        let aggregator = HyperLogLogPlusPlus::builder()
1206            .normal_precision(13)
1207            .sparse_precision(16)
1208            .build_for_bytes()
1209            .expect("Build failed");
1210
1211        let actual_proto = aggregator.serialize_to_proto().expect("Serialize failed");
1212        let hll_ext = get_hll_extension(&actual_proto);
1213
1214        assert!(hll_ext.has_sparse_data()); // Field should be present
1215        assert!(hll_ext.sparse_data().is_empty()); // And its value empty
1216        assert!(!hll_ext.has_data() || hll_ext.data().is_empty()); // Normal data should not be present or empty
1217    }
1218
1219    #[test]
1220    fn builder_uses_both_precision_defaults_when_unspecified() {
1221        let aggregator = HyperLogLogPlusPlus::builder()
1222            .build_for_string()
1223            .expect("Build failed");
1224        assert_eq!(
1225            aggregator.normal_precision(),
1226            HyperLogLogPlusPlus::DEFAULT_NORMAL_PRECISION
1227        );
1228        assert_eq!(
1229            aggregator.sparse_precision(),
1230            HyperLogLogPlusPlus::DEFAULT_NORMAL_PRECISION
1231                + HyperLogLogPlusPlus::DEFAULT_SPARSE_PRECISION_DELTA
1232        );
1233    }
1234
1235    #[test]
1236    fn builder_uses_normal_precision_default_when_unspecified() {
1237        let aggregator = HyperLogLogPlusPlus::builder()
1238            .sparse_precision(18)
1239            .build_for_u32()
1240            .expect("Build failed");
1241        assert_eq!(
1242            aggregator.normal_precision(),
1243            HyperLogLogPlusPlus::DEFAULT_NORMAL_PRECISION
1244        );
1245        assert_eq!(aggregator.sparse_precision(), 18);
1246    }
1247
1248    #[test]
1249    fn builder_uses_sparse_precision_default_when_unspecified() {
1250        let aggregator = HyperLogLogPlusPlus::builder()
1251            .normal_precision(18)
1252            .build_for_u64()
1253            .expect("Build failed");
1254        assert_eq!(aggregator.normal_precision(), 18);
1255        assert_eq!(
1256            aggregator.sparse_precision(),
1257            18 + HyperLogLogPlusPlus::DEFAULT_SPARSE_PRECISION_DELTA
1258        );
1259    }
1260
1261    #[test]
1262    fn builder_uses_both_precisions_as_specified() {
1263        let aggregator = HyperLogLogPlusPlus::builder()
1264            .normal_precision(14)
1265            .sparse_precision(17)
1266            .build_for_bytes()
1267            .expect("Build failed");
1268        assert_eq!(aggregator.normal_precision(), 14);
1269        assert_eq!(aggregator.sparse_precision(), 17);
1270    }
1271
1272    #[test]
1273    fn builder_invocation_order_does_not_matter() {
1274        let aggregator = HyperLogLogPlusPlus::builder()
1275            .sparse_precision(17)
1276            .normal_precision(14)
1277            .build_for_bytes()
1278            .expect("Build failed");
1279        assert_eq!(aggregator.normal_precision(), 14);
1280        assert_eq!(aggregator.sparse_precision(), 17);
1281    }
1282
1283    #[test]
1284    fn builder_no_sparse_mode_behavior() {
1285        let aggregator = HyperLogLogPlusPlus::builder()
1286            .no_sparse_mode()
1287            .normal_precision(16)
1288            .build_for_bytes()
1289            .expect("Build failed");
1290
1291        assert_eq!(aggregator.sparse_precision(), 0);
1292        assert_eq!(aggregator.normal_precision(), 16);
1293        assert!(aggregator.representation.is_normal());
1294    }
1295
1296    #[test]
1297    fn builder_reuse() {
1298        let mut hll_builder = HyperLogLogPlusPlus::builder()
1299            .normal_precision(13)
1300            .sparse_precision(16);
1301
1302        let mut bytes_aggregator = hll_builder
1303            .clone()
1304            .build_for_bytes()
1305            .expect("Build bytes failed");
1306        bytes_aggregator.add_bytes(&[12]).unwrap();
1307        assert_eq!(bytes_aggregator.result().unwrap(), 1);
1308        assert_eq!(bytes_aggregator.num_values(), 1);
1309        assert_eq!(bytes_aggregator.normal_precision(), 13);
1310        assert_eq!(bytes_aggregator.sparse_precision(), 16);
1311
1312        let mut longs_aggregator = hll_builder
1313            .clone()
1314            .build_for_u64()
1315            .expect("Build longs failed");
1316        longs_aggregator.add_u64(1).unwrap();
1317        assert_eq!(longs_aggregator.result().unwrap(), 1);
1318        assert_eq!(longs_aggregator.num_values(), 1);
1319        assert_eq!(longs_aggregator.normal_precision(), 13);
1320        assert_eq!(longs_aggregator.sparse_precision(), 16);
1321
1322        // Change precisions on the builder
1323        hll_builder = hll_builder.sparse_precision(20).normal_precision(18);
1324
1325        let mut string_aggregator = hll_builder.build_for_string().expect("Build string failed");
1326        string_aggregator.add_string("foo").unwrap();
1327        assert_eq!(string_aggregator.result().unwrap(), 1);
1328        assert_eq!(string_aggregator.num_values(), 1);
1329        assert_eq!(string_aggregator.normal_precision(), 18);
1330        assert_eq!(string_aggregator.sparse_precision(), 20);
1331    }
1332
1333    #[test]
1334    fn test_result() {
1335        let mut aggregator = HyperLogLogPlusPlus::builder()
1336            .build_for_u64()
1337            .expect("Build failed");
1338        for i in 0..=2188 {
1339            aggregator.add_u64(i).unwrap();
1340            aggregator.result().unwrap();
1341        }
1342        println!("result (2188): {}", aggregator.result().unwrap());
1343        aggregator.add_u64(2189).unwrap();
1344        println!("result (2189): {}", aggregator.result().unwrap());
1345    }
1346}