Skip to main content

sketches_ddsketch/
encoding.rs

1//! Java-compatible binary encoding/decoding for DDSketch.
2//!
3//! This module implements the binary format used by the Java
4//! `com.datadoghq.sketch.ddsketch.DDSketchWithExactSummaryStatistics` class
5//! from the DataDog/sketches-java library. It enables cross-language
6//! serialization so that sketches produced in Rust can be deserialized
7//! and merged by Java consumers.
8
9use std::fmt;
10
11use crate::config::Config;
12use crate::ddsketch::DDSketch;
13use crate::store::Store;
14
15// ---------------------------------------------------------------------------
16// Flag byte layout
17//
18// Each flag byte packs a 2-bit type ordinal in the low bits and a 6-bit
19// subflag in the upper bits:  (subflag << 2) | type_ordinal
20// See: https://github.com/DataDog/sketches-java/blob/master/src/main/java/com/datadoghq/sketch/ddsketch/encoding/Flag.java
21// ---------------------------------------------------------------------------
22
23/// The 2-bit type field occupying the low bits of every flag byte.
24#[repr(u8)]
25#[derive(Debug, Clone, Copy, PartialEq, Eq)]
26enum FlagType {
27    SketchFeatures = 0,
28    PositiveStore = 1,
29    IndexMapping = 2,
30    NegativeStore = 3,
31}
32
33impl FlagType {
34    fn from_byte(b: u8) -> Option<Self> {
35        match b & 0x03 {
36            0 => Some(Self::SketchFeatures),
37            1 => Some(Self::PositiveStore),
38            2 => Some(Self::IndexMapping),
39            3 => Some(Self::NegativeStore),
40            _ => None,
41        }
42    }
43}
44
45/// Construct a flag byte from a subflag and a type.
46const fn flag(subflag: u8, flag_type: FlagType) -> u8 {
47    (subflag << 2) | (flag_type as u8)
48}
49
50// Pre-computed flag bytes for the sketch features we encode/decode.
51const FLAG_INDEX_MAPPING_LOG: u8 = flag(0, FlagType::IndexMapping); // 0x02
52const FLAG_ZERO_COUNT: u8 = flag(1, FlagType::SketchFeatures); // 0x04
53const FLAG_COUNT: u8 = flag(0x28, FlagType::SketchFeatures); // 0xA0
54const FLAG_SUM: u8 = flag(0x21, FlagType::SketchFeatures); // 0x84
55const FLAG_MIN: u8 = flag(0x22, FlagType::SketchFeatures); // 0x88
56const FLAG_MAX: u8 = flag(0x23, FlagType::SketchFeatures); // 0x8C
57
58/// BinEncodingMode subflags for store flag bytes.
59/// See: https://github.com/DataDog/sketches-java/blob/master/src/main/java/com/datadoghq/sketch/ddsketch/encoding/BinEncodingMode.java
60#[repr(u8)]
61#[derive(Debug, Clone, Copy, PartialEq, Eq)]
62enum BinEncodingMode {
63    IndexDeltasAndCounts = 1,
64    IndexDeltas = 2,
65    ContiguousCounts = 3,
66}
67
68impl BinEncodingMode {
69    fn from_subflag(subflag: u8) -> Option<Self> {
70        match subflag {
71            1 => Some(Self::IndexDeltasAndCounts),
72            2 => Some(Self::IndexDeltas),
73            3 => Some(Self::ContiguousCounts),
74            _ => None,
75        }
76    }
77}
78
79const VAR_DOUBLE_ROTATE_DISTANCE: u32 = 6;
80const MAX_VAR_LEN_64: usize = 9;
81
82const DEFAULT_MAX_BINS: u32 = 2048;
83
84// ---------------------------------------------------------------------------
85// Error type
86// ---------------------------------------------------------------------------
87
88#[derive(Debug, Clone)]
89pub enum DecodeError {
90    UnexpectedEof,
91    InvalidFlag(u8),
92    InvalidData(String),
93}
94
95impl fmt::Display for DecodeError {
96    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
97        match self {
98            Self::UnexpectedEof => write!(f, "unexpected end of input"),
99            Self::InvalidFlag(b) => write!(f, "invalid flag byte: 0x{b:02X}"),
100            Self::InvalidData(msg) => write!(f, "invalid data: {msg}"),
101        }
102    }
103}
104
105impl std::error::Error for DecodeError {}
106
107// ---------------------------------------------------------------------------
108// VarEncoding — bit-exact port of Java VarEncodingHelper
109// See: https://github.com/DataDog/sketches-java/blob/master/src/main/java/com/datadoghq/sketch/ddsketch/encoding/VarEncodingHelper.java
110// ---------------------------------------------------------------------------
111
112fn encode_unsigned_var_long(out: &mut Vec<u8>, mut value: u64) {
113    let length = ((63 - value.leading_zeros() as i32) / 7).clamp(0, 8);
114    for _ in 0..length {
115        out.push((value as u8) | 0x80);
116        value >>= 7;
117    }
118    out.push(value as u8);
119}
120
121fn decode_unsigned_var_long(input: &mut &[u8]) -> Result<u64, DecodeError> {
122    let mut value: u64 = 0;
123    let mut shift: u32 = 0;
124    loop {
125        let next = read_byte(input)?;
126        if next < 0x80 || shift == 56 {
127            return Ok(value | (u64::from(next) << shift));
128        }
129        value |= (u64::from(next) & 0x7F) << shift;
130        shift += 7;
131    }
132}
133
134/// ZigZag encode then var-long encode.
135fn encode_signed_var_long(out: &mut Vec<u8>, value: i64) {
136    let encoded = ((value >> 63) ^ (value << 1)) as u64;
137    encode_unsigned_var_long(out, encoded);
138}
139
140fn decode_signed_var_long(input: &mut &[u8]) -> Result<i64, DecodeError> {
141    let encoded = decode_unsigned_var_long(input)?;
142    Ok(((encoded >> 1) as i64) ^ -((encoded & 1) as i64))
143}
144
145fn double_to_var_bits(value: f64) -> u64 {
146    let bits = f64::to_bits(value + 1.0).wrapping_sub(f64::to_bits(1.0));
147    bits.rotate_left(VAR_DOUBLE_ROTATE_DISTANCE)
148}
149
150fn var_bits_to_double(bits: u64) -> f64 {
151    f64::from_bits(
152        bits.rotate_right(VAR_DOUBLE_ROTATE_DISTANCE)
153            .wrapping_add(f64::to_bits(1.0)),
154    ) - 1.0
155}
156
157fn encode_var_double(out: &mut Vec<u8>, value: f64) {
158    let mut bits = double_to_var_bits(value);
159    for _ in 0..MAX_VAR_LEN_64 - 1 {
160        let next = (bits >> 57) as u8;
161        bits <<= 7;
162        if bits == 0 {
163            out.push(next);
164            return;
165        }
166        out.push(next | 0x80);
167    }
168    out.push((bits >> 56) as u8);
169}
170
171fn decode_var_double(input: &mut &[u8]) -> Result<f64, DecodeError> {
172    let mut bits: u64 = 0;
173    let mut shift: i32 = 57; // 8*8 - 7
174    loop {
175        let next = read_byte(input)?;
176        if shift == 1 {
177            bits |= u64::from(next);
178            break;
179        }
180        if next < 0x80 {
181            bits |= u64::from(next) << shift;
182            break;
183        }
184        bits |= (u64::from(next) & 0x7F) << shift;
185        shift -= 7;
186    }
187    Ok(var_bits_to_double(bits))
188}
189
190// ---------------------------------------------------------------------------
191// Byte-level helpers
192// ---------------------------------------------------------------------------
193
194fn read_byte(input: &mut &[u8]) -> Result<u8, DecodeError> {
195    match input.split_first() {
196        Some((&byte, rest)) => {
197            *input = rest;
198            Ok(byte)
199        }
200        None => Err(DecodeError::UnexpectedEof),
201    }
202}
203
204fn write_f64_le(out: &mut Vec<u8>, value: f64) {
205    out.extend_from_slice(&value.to_le_bytes());
206}
207
208fn read_f64_le(input: &mut &[u8]) -> Result<f64, DecodeError> {
209    if input.len() < 8 {
210        return Err(DecodeError::UnexpectedEof);
211    }
212    let (bytes, rest) = input.split_at(8);
213    *input = rest;
214    // bytes is guaranteed to be length 8 by the split_at above.
215    let arr = [
216        bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
217    ];
218    Ok(f64::from_le_bytes(arr))
219}
220
221// ---------------------------------------------------------------------------
222// Store encoding/decoding
223// See: https://github.com/DataDog/sketches-java/blob/master/src/main/java/com/datadoghq/sketch/ddsketch/store/DenseStore.java  (encode/decode methods)
224// ---------------------------------------------------------------------------
225
226/// Collect non-zero bins in the store as (absolute_index, count) pairs.
227///
228/// Allocation is acceptable here: this runs once per encode and the Vec
229/// has at most `max_num_bins` entries.
230fn collect_non_zero_bins(store: &Store) -> Vec<(i32, u64)> {
231    if store.count == 0 {
232        return Vec::new();
233    }
234    let start = (store.min_key - store.offset) as usize;
235    let end = ((store.max_key - store.offset + 1) as usize).min(store.bins.len());
236    store.bins[start..end]
237        .iter()
238        .enumerate()
239        .filter(|&(_, &count)| count > 0)
240        .map(|(i, &count)| (start as i32 + i as i32 + store.offset, count))
241        .collect()
242}
243
244fn encode_store(out: &mut Vec<u8>, store: &Store, flag_type: FlagType) {
245    let bins = collect_non_zero_bins(store);
246    if bins.is_empty() {
247        return;
248    }
249
250    out.push(flag(BinEncodingMode::IndexDeltasAndCounts as u8, flag_type));
251    encode_unsigned_var_long(out, bins.len() as u64);
252
253    let mut prev_index: i64 = 0;
254    for &(index, count) in &bins {
255        encode_signed_var_long(out, i64::from(index) - prev_index);
256        encode_var_double(out, count as f64);
257        prev_index = i64::from(index);
258    }
259}
260
261fn decode_store(input: &mut &[u8], subflag: u8, bin_limit: usize) -> Result<Store, DecodeError> {
262    let mode = BinEncodingMode::from_subflag(subflag).ok_or_else(|| {
263        DecodeError::InvalidData(format!("unknown bin encoding mode subflag: {subflag}"))
264    })?;
265    let num_bins = decode_unsigned_var_long(input)? as usize;
266    let mut store = Store::new(bin_limit);
267
268    match mode {
269        BinEncodingMode::IndexDeltasAndCounts => {
270            let mut index: i64 = 0;
271            for _ in 0..num_bins {
272                index += decode_signed_var_long(input)?;
273                let count = decode_var_double(input)?;
274                store.add_count(index as i32, count as u64);
275            }
276        }
277        BinEncodingMode::IndexDeltas => {
278            let mut index: i64 = 0;
279            for _ in 0..num_bins {
280                index += decode_signed_var_long(input)?;
281                store.add_count(index as i32, 1);
282            }
283        }
284        BinEncodingMode::ContiguousCounts => {
285            let start_index = decode_signed_var_long(input)?;
286            let index_delta = decode_signed_var_long(input)?;
287            let mut index = start_index;
288            for _ in 0..num_bins {
289                let count = decode_var_double(input)?;
290                store.add_count(index as i32, count as u64);
291                index += index_delta;
292            }
293        }
294    }
295
296    Ok(store)
297}
298
299// ---------------------------------------------------------------------------
300// Top-level encode / decode
301// ---------------------------------------------------------------------------
302
303/// Encode a DDSketch into the Java-compatible binary format.
304///
305/// The output follows the encoding order of
306/// `DDSketchWithExactSummaryStatistics.encode()` then `DDSketch.encode()`:
307///
308/// 1. Summary statistics: COUNT, MIN, MAX (if count > 0)
309/// 2. SUM (if sum != 0)
310/// 3. Index mapping (LOG layout): gamma, indexOffset
311/// 4. Zero count (if > 0)
312/// 5. Positive store bins
313/// 6. Negative store bins
314pub fn encode_to_java_bytes(sketch: &DDSketch) -> Vec<u8> {
315    let mut out = Vec::new();
316    let count = sketch.count() as f64;
317
318    // Summary statistics (DDSketchWithExactSummaryStatistics.encode)
319    if count != 0.0 {
320        out.push(FLAG_COUNT);
321        encode_var_double(&mut out, count);
322        out.push(FLAG_MIN);
323        write_f64_le(&mut out, sketch.min);
324        out.push(FLAG_MAX);
325        write_f64_le(&mut out, sketch.max);
326    }
327    if sketch.sum != 0.0 {
328        out.push(FLAG_SUM);
329        write_f64_le(&mut out, sketch.sum);
330    }
331
332    // DDSketch.encode: index mapping + zero count + stores
333    out.push(FLAG_INDEX_MAPPING_LOG);
334    write_f64_le(&mut out, sketch.config.gamma);
335    write_f64_le(&mut out, 0.0_f64);
336
337    if sketch.zero_count != 0 {
338        out.push(FLAG_ZERO_COUNT);
339        encode_var_double(&mut out, sketch.zero_count as f64);
340    }
341
342    encode_store(&mut out, &sketch.store, FlagType::PositiveStore);
343    encode_store(&mut out, &sketch.negative_store, FlagType::NegativeStore);
344
345    out
346}
347
348/// Decode a DDSketch from the Java-compatible binary format.
349///
350/// Accepts bytes with or without a `0x02` version prefix.
351pub fn decode_from_java_bytes(bytes: &[u8]) -> Result<DDSketch, DecodeError> {
352    if bytes.is_empty() {
353        return Err(DecodeError::UnexpectedEof);
354    }
355
356    let mut input = bytes;
357
358    // Skip optional version prefix (0x02 followed by a valid flag byte).
359    if input.len() >= 2 && input[0] == 0x02 && is_valid_flag_byte(input[1]) {
360        input = &input[1..];
361    }
362
363    let mut gamma: Option<f64> = None;
364    let mut zero_count: f64 = 0.0;
365    let mut sum: f64 = 0.0;
366    let mut min: f64 = f64::INFINITY;
367    let mut max: f64 = f64::NEG_INFINITY;
368    let mut positive_store: Option<Store> = None;
369    let mut negative_store: Option<Store> = None;
370
371    while !input.is_empty() {
372        let flag_byte = read_byte(&mut input)?;
373        let flag_type =
374            FlagType::from_byte(flag_byte).ok_or(DecodeError::InvalidFlag(flag_byte))?;
375        let subflag = flag_byte >> 2;
376
377        match flag_type {
378            FlagType::IndexMapping => {
379                gamma = Some(read_f64_le(&mut input)?);
380                let _index_offset = read_f64_le(&mut input)?;
381            }
382            FlagType::SketchFeatures => match flag_byte {
383                FLAG_ZERO_COUNT => zero_count += decode_var_double(&mut input)?,
384                FLAG_COUNT => {
385                    let _count = decode_var_double(&mut input)?;
386                }
387                FLAG_SUM => sum = read_f64_le(&mut input)?,
388                FLAG_MIN => min = read_f64_le(&mut input)?,
389                FLAG_MAX => max = read_f64_le(&mut input)?,
390                _ => return Err(DecodeError::InvalidFlag(flag_byte)),
391            },
392            FlagType::PositiveStore => {
393                positive_store = Some(decode_store(
394                    &mut input,
395                    subflag,
396                    DEFAULT_MAX_BINS as usize,
397                )?);
398            }
399            FlagType::NegativeStore => {
400                negative_store = Some(decode_store(
401                    &mut input,
402                    subflag,
403                    DEFAULT_MAX_BINS as usize,
404                )?);
405            }
406        }
407    }
408
409    let g = gamma.unwrap_or_else(|| Config::defaults().gamma);
410    let config = Config::from_gamma(g);
411    let store = positive_store.unwrap_or_else(|| Store::new(config.max_num_bins as usize));
412    let neg = negative_store.unwrap_or_else(|| Store::new(config.max_num_bins as usize));
413
414    Ok(DDSketch {
415        config,
416        store,
417        negative_store: neg,
418        min,
419        max,
420        sum,
421        zero_count: zero_count as u64,
422    })
423}
424
425/// Check whether a byte is a valid flag byte for the DDSketch binary format.
426fn is_valid_flag_byte(b: u8) -> bool {
427    // Known sketch-feature flags
428    if matches!(
429        b,
430        FLAG_ZERO_COUNT | FLAG_COUNT | FLAG_SUM | FLAG_MIN | FLAG_MAX | FLAG_INDEX_MAPPING_LOG
431    ) {
432        return true;
433    }
434    let Some(flag_type) = FlagType::from_byte(b) else {
435        return false;
436    };
437    let subflag = b >> 2;
438    match flag_type {
439        FlagType::PositiveStore | FlagType::NegativeStore => (1..=3).contains(&subflag),
440        FlagType::IndexMapping => subflag <= 4, // LOG=0, LOG_LINEAR=1 .. LOG_QUARTIC=4
441        _ => false,
442    }
443}
444
445// ---------------------------------------------------------------------------
446// Tests
447// ---------------------------------------------------------------------------
448
449#[cfg(test)]
450mod tests {
451    use super::*;
452    use crate::{Config, DDSketch};
453
454    // --- VarEncoding unit tests ---
455
456    #[test]
457    fn test_unsigned_var_long_zero() {
458        let mut buf = Vec::new();
459        encode_unsigned_var_long(&mut buf, 0);
460        assert_eq!(buf, [0x00]);
461
462        let mut input = buf.as_slice();
463        assert_eq!(decode_unsigned_var_long(&mut input).unwrap(), 0);
464        assert!(input.is_empty());
465    }
466
467    #[test]
468    fn test_unsigned_var_long_small() {
469        let mut buf = Vec::new();
470        encode_unsigned_var_long(&mut buf, 1);
471        assert_eq!(buf, [0x01]);
472
473        let mut input = buf.as_slice();
474        assert_eq!(decode_unsigned_var_long(&mut input).unwrap(), 1);
475    }
476
477    #[test]
478    fn test_unsigned_var_long_128() {
479        let mut buf = Vec::new();
480        encode_unsigned_var_long(&mut buf, 128);
481        assert_eq!(buf, [0x80, 0x01]);
482
483        let mut input = buf.as_slice();
484        assert_eq!(decode_unsigned_var_long(&mut input).unwrap(), 128);
485    }
486
487    #[test]
488    fn test_unsigned_var_long_roundtrip() {
489        for v in [0u64, 1, 127, 128, 255, 256, 16383, 16384, u64::MAX] {
490            let mut buf = Vec::new();
491            encode_unsigned_var_long(&mut buf, v);
492            let mut input = buf.as_slice();
493            let decoded = decode_unsigned_var_long(&mut input).unwrap();
494            assert_eq!(decoded, v, "roundtrip failed for {}", v);
495            assert!(input.is_empty());
496        }
497    }
498
499    #[test]
500    fn test_signed_var_long_roundtrip() {
501        for v in [0i64, 1, -1, 63, -64, 64, -65, i64::MAX, i64::MIN] {
502            let mut buf = Vec::new();
503            encode_signed_var_long(&mut buf, v);
504            let mut input = buf.as_slice();
505            let decoded = decode_signed_var_long(&mut input).unwrap();
506            assert_eq!(decoded, v, "roundtrip failed for {}", v);
507            assert!(input.is_empty());
508        }
509    }
510
511    #[test]
512    fn test_var_double_roundtrip() {
513        for v in [0.0, 1.0, 2.0, 5.0, 15.0, 42.0, 100.0, 1e-9, 1e15, 0.5, 7.77] {
514            let mut buf = Vec::new();
515            encode_var_double(&mut buf, v);
516            let mut input = buf.as_slice();
517            let decoded = decode_var_double(&mut input).unwrap();
518            assert!(
519                (decoded - v).abs() < 1e-15 || decoded == v,
520                "roundtrip failed for {}: got {}",
521                v,
522                decoded,
523            );
524            assert!(input.is_empty());
525        }
526    }
527
528    #[test]
529    fn test_var_double_small_integers() {
530        let mut buf = Vec::new();
531        encode_var_double(&mut buf, 1.0);
532        assert_eq!(buf.len(), 1, "VarDouble(1.0) should be 1 byte");
533
534        buf.clear();
535        encode_var_double(&mut buf, 5.0);
536        assert_eq!(buf.len(), 1, "VarDouble(5.0) should be 1 byte");
537    }
538
539    // --- DDSketch encode/decode roundtrip tests ---
540
541    #[test]
542    fn test_encode_empty_sketch() {
543        let sketch = DDSketch::new(Config::defaults());
544        let bytes = sketch.to_java_bytes();
545        assert!(!bytes.is_empty());
546
547        let decoded = DDSketch::from_java_bytes(&bytes).unwrap();
548        assert_eq!(decoded.count(), 0);
549        assert_eq!(decoded.min(), None);
550        assert_eq!(decoded.max(), None);
551        assert_eq!(decoded.sum(), None);
552    }
553
554    #[test]
555    fn test_encode_simple_sketch() {
556        let mut sketch = DDSketch::new(Config::defaults());
557        for v in [1.0, 2.0, 3.0, 4.0, 5.0] {
558            sketch.add(v);
559        }
560
561        let bytes = sketch.to_java_bytes();
562        let decoded = DDSketch::from_java_bytes(&bytes).unwrap();
563
564        assert_eq!(decoded.count(), 5);
565        assert_eq!(decoded.min(), Some(1.0));
566        assert_eq!(decoded.max(), Some(5.0));
567        assert_eq!(decoded.sum(), Some(15.0));
568
569        assert_quantiles_match(&sketch, &decoded, &[0.5, 0.9, 0.95, 0.99]);
570    }
571
572    #[test]
573    fn test_encode_single_value() {
574        let mut sketch = DDSketch::new(Config::defaults());
575        sketch.add(42.0);
576
577        let bytes = sketch.to_java_bytes();
578        let decoded = DDSketch::from_java_bytes(&bytes).unwrap();
579
580        assert_eq!(decoded.count(), 1);
581        assert_eq!(decoded.min(), Some(42.0));
582        assert_eq!(decoded.max(), Some(42.0));
583        assert_eq!(decoded.sum(), Some(42.0));
584    }
585
586    #[test]
587    fn test_encode_negative_values() {
588        let mut sketch = DDSketch::new(Config::defaults());
589        for v in [-3.0, -1.0, 2.0, 5.0] {
590            sketch.add(v);
591        }
592
593        let bytes = sketch.to_java_bytes();
594        let decoded = DDSketch::from_java_bytes(&bytes).unwrap();
595
596        assert_eq!(decoded.count(), 4);
597        assert_eq!(decoded.min(), Some(-3.0));
598        assert_eq!(decoded.max(), Some(5.0));
599        assert_eq!(decoded.sum(), Some(3.0));
600
601        assert_quantiles_match(&sketch, &decoded, &[0.0, 0.25, 0.5, 0.75, 1.0]);
602    }
603
604    #[test]
605    fn test_encode_with_zero_value() {
606        let mut sketch = DDSketch::new(Config::defaults());
607        for v in [0.0, 1.0, 2.0] {
608            sketch.add(v);
609        }
610
611        let bytes = sketch.to_java_bytes();
612        let decoded = DDSketch::from_java_bytes(&bytes).unwrap();
613
614        assert_eq!(decoded.count(), 3);
615        assert_eq!(decoded.min(), Some(0.0));
616        assert_eq!(decoded.max(), Some(2.0));
617        assert_eq!(decoded.sum(), Some(3.0));
618        assert_eq!(decoded.zero_count, 1);
619    }
620
621    #[test]
622    fn test_encode_large_range() {
623        let mut sketch = DDSketch::new(Config::defaults());
624        sketch.add(0.001);
625        sketch.add(1_000_000.0);
626
627        let bytes = sketch.to_java_bytes();
628        let decoded = DDSketch::from_java_bytes(&bytes).unwrap();
629
630        assert_eq!(decoded.count(), 2);
631        assert_eq!(decoded.min(), Some(0.001));
632        assert_eq!(decoded.max(), Some(1_000_000.0));
633    }
634
635    #[test]
636    fn test_encode_with_version_prefix() {
637        let mut sketch = DDSketch::new(Config::defaults());
638        for v in [1.0, 2.0, 3.0] {
639            sketch.add(v);
640        }
641
642        let bytes = sketch.to_java_bytes();
643
644        // Simulate Java's toByteArrayV2: prepend 0x02
645        let mut v2_bytes = vec![0x02];
646        v2_bytes.extend_from_slice(&bytes);
647
648        let decoded = DDSketch::from_java_bytes(&v2_bytes).unwrap();
649        assert_eq!(decoded.count(), 3);
650        assert_eq!(decoded.min(), Some(1.0));
651        assert_eq!(decoded.max(), Some(3.0));
652    }
653
654    #[test]
655    fn test_byte_level_encoding() {
656        let mut sketch = DDSketch::new(Config::defaults());
657        sketch.add(1.0);
658
659        let bytes = sketch.to_java_bytes();
660
661        assert_eq!(bytes[0], FLAG_COUNT, "first byte should be COUNT flag");
662        assert!(
663            bytes.contains(&FLAG_INDEX_MAPPING_LOG),
664            "should contain index mapping flag"
665        );
666    }
667
668    // --- Cross-language golden byte tests ---
669    //
670    // Golden bytes generated by Java's DDSketchWithExactSummaryStatistics.encode()
671    // using LogarithmicMapping(0.01) + CollapsingLowestDenseStore(2048).
672
673    const GOLDEN_SIMPLE: &str = "a00588000000000000f03f8c0000000000001440840000000000002e4002fd4a815abf52f03f000000000000000005050002440228021e021602";
674    const GOLDEN_SINGLE: &str = "a0028800000000000045408c000000000000454084000000000000454002fd4a815abf52f03f00000000000000000501f40202";
675    const GOLDEN_NEGATIVE: &str = "a084408800000000000008c08c000000000000144084000000000000084002fd4a815abf52f03f0000000000000000050244025c02070200026c02";
676    const GOLDEN_ZERO: &str = "a0048800000000000000008c000000000000004084000000000000084002fd4a815abf52f03f00000000000000000402050200024402";
677    const GOLDEN_EMPTY: &str = "02fd4a815abf52f03f0000000000000000";
678    const GOLDEN_MANY: &str = "a08d1488000000000000f03f8c0000000000005940840000000000bab34002fd4a815abf52f03f000000000000000005550002440228021e021602120210020c020c020c0208020a020802060208020602060206020602040206020402040204020402040204020402040204020202040202020402020204020202020204020202020202020402020202020202020202020202020202020202020202020202020202020203020202020202020302020202020302020202020302020203020202030202020302030202020302030203020202030203020302030202";
679
680    fn hex_to_bytes(hex: &str) -> Vec<u8> {
681        (0..hex.len())
682            .step_by(2)
683            .map(|i| u8::from_str_radix(&hex[i..i + 2], 16).unwrap())
684            .collect()
685    }
686
687    fn bytes_to_hex(bytes: &[u8]) -> String {
688        bytes.iter().map(|b| format!("{b:02x}")).collect()
689    }
690
691    fn assert_golden(label: &str, sketch: &DDSketch, golden_hex: &str) {
692        let bytes = sketch.to_java_bytes();
693        let expected = hex_to_bytes(golden_hex);
694        assert_eq!(
695            bytes,
696            expected,
697            "Rust encoding doesn't match Java golden bytes for {}.\nRust: {}\nJava: {}",
698            label,
699            bytes_to_hex(&bytes),
700            golden_hex,
701        );
702    }
703
704    fn assert_quantiles_match(a: &DDSketch, b: &DDSketch, quantiles: &[f64]) {
705        for &q in quantiles {
706            let va = a.quantile(q).unwrap().unwrap();
707            let vb = b.quantile(q).unwrap().unwrap();
708            assert!(
709                (va - vb).abs() / va.abs().max(1e-15) < 1e-12,
710                "quantile({}) mismatch: {} vs {}",
711                q,
712                va,
713                vb,
714            );
715        }
716    }
717
718    #[test]
719    fn test_cross_language_simple() {
720        let mut sketch = DDSketch::new(Config::defaults());
721        for v in [1.0, 2.0, 3.0, 4.0, 5.0] {
722            sketch.add(v);
723        }
724        assert_golden("SIMPLE", &sketch, GOLDEN_SIMPLE);
725    }
726
727    #[test]
728    fn test_cross_language_single() {
729        let mut sketch = DDSketch::new(Config::defaults());
730        sketch.add(42.0);
731        assert_golden("SINGLE", &sketch, GOLDEN_SINGLE);
732    }
733
734    #[test]
735    fn test_cross_language_negative() {
736        let mut sketch = DDSketch::new(Config::defaults());
737        for v in [-3.0, -1.0, 2.0, 5.0] {
738            sketch.add(v);
739        }
740        assert_golden("NEGATIVE", &sketch, GOLDEN_NEGATIVE);
741    }
742
743    #[test]
744    fn test_cross_language_zero() {
745        let mut sketch = DDSketch::new(Config::defaults());
746        for v in [0.0, 1.0, 2.0] {
747            sketch.add(v);
748        }
749        assert_golden("ZERO", &sketch, GOLDEN_ZERO);
750    }
751
752    #[test]
753    fn test_cross_language_empty() {
754        let sketch = DDSketch::new(Config::defaults());
755        assert_golden("EMPTY", &sketch, GOLDEN_EMPTY);
756    }
757
758    #[test]
759    fn test_cross_language_many() {
760        let mut sketch = DDSketch::new(Config::defaults());
761        for i in 1..=100 {
762            sketch.add(i as f64);
763        }
764        assert_golden("MANY", &sketch, GOLDEN_MANY);
765    }
766
767    #[test]
768    fn test_decode_java_golden_bytes() {
769        for (name, hex) in [
770            ("SIMPLE", GOLDEN_SIMPLE),
771            ("SINGLE", GOLDEN_SINGLE),
772            ("NEGATIVE", GOLDEN_NEGATIVE),
773            ("ZERO", GOLDEN_ZERO),
774            ("EMPTY", GOLDEN_EMPTY),
775            ("MANY", GOLDEN_MANY),
776        ] {
777            let bytes = hex_to_bytes(hex);
778            let result = DDSketch::from_java_bytes(&bytes);
779            assert!(
780                result.is_ok(),
781                "failed to decode {}: {:?}",
782                name,
783                result.err()
784            );
785        }
786    }
787
788    #[test]
789    fn test_encode_decode_many_values() {
790        let mut sketch = DDSketch::new(Config::defaults());
791        for i in 1..=100 {
792            sketch.add(i as f64);
793        }
794
795        let bytes = sketch.to_java_bytes();
796        let decoded = DDSketch::from_java_bytes(&bytes).unwrap();
797
798        assert_eq!(decoded.count(), 100);
799        assert_eq!(decoded.min(), Some(1.0));
800        assert_eq!(decoded.max(), Some(100.0));
801        assert_eq!(decoded.sum(), Some(5050.0));
802
803        let alpha = 0.01;
804        let orig_p95 = sketch.quantile(0.95).unwrap().unwrap();
805        let dec_p95 = decoded.quantile(0.95).unwrap().unwrap();
806        assert!(
807            (orig_p95 - dec_p95).abs() / orig_p95 < alpha,
808            "p95 mismatch: {} vs {}",
809            orig_p95,
810            dec_p95,
811        );
812    }
813}