splinter_rs/
splinter.rs

1use std::{fmt::Debug, ops::RangeBounds};
2
3use bytes::Bytes;
4
5use crate::{
6    Encodable, Optimizable, SplinterRef,
7    codec::{encoder::Encoder, footer::Footer},
8    level::High,
9    partition::Partition,
10    traits::{PartitionRead, PartitionWrite},
11    util::RangeExt,
12};
13
14/// A compressed bitmap optimized for small, sparse sets of 32-bit unsigned integers.
15///
16/// `Splinter` is the main owned data structure that can be built incrementally by inserting
17/// values and then optimized for size and query performance. It uses a 256-way tree structure
18/// by decomposing integers into big-endian component bytes, with nodes optimized into four
19/// different storage classes: tree, vec, bitmap, and run.
20///
21/// For zero-copy querying of serialized data, see [`SplinterRef`].
22/// For a clone-on-write wrapper, see [`crate::CowSplinter`].
23///
24/// # Examples
25///
26/// Basic usage:
27///
28/// ```
29/// use splinter_rs::{Splinter, PartitionWrite, PartitionRead, Optimizable};
30///
31/// let mut splinter = Splinter::from_iter([1024, 2048, 123]);
32///
33/// // Check membership
34/// assert!(splinter.contains(1024));
35/// assert!(!splinter.contains(999));
36///
37/// // Get cardinality
38/// assert_eq!(splinter.cardinality(), 3);
39///
40/// // Optimize for better compression, recommended before encoding to bytes.
41/// splinter.optimize();
42/// ```
43///
44/// Building from iterator:
45///
46/// ```
47/// use splinter_rs::{Splinter, PartitionRead};
48///
49/// let values = vec![100, 200, 300, 400];
50/// let splinter: Splinter = values.into_iter().collect();
51///
52/// assert_eq!(splinter.cardinality(), 4);
53/// assert!(splinter.contains(200));
54/// ```
55#[derive(Clone, PartialEq, Eq, Default, Debug)]
56pub struct Splinter(Partition<High>);
57
58static_assertions::const_assert_eq!(std::mem::size_of::<Splinter>(), 40);
59
60impl Splinter {
61    /// An empty Splinter, suitable for usage in a const context.
62    pub const EMPTY: Self = Splinter(Partition::EMPTY);
63
64    /// A full Splinter, suitable for usage in a const context.
65    pub const FULL: Self = Splinter(Partition::Full);
66
67    /// Encodes this splinter into a [`SplinterRef`] for zero-copy querying.
68    ///
69    /// This method serializes the splinter data and returns a [`SplinterRef<Bytes>`]
70    /// that can be used for efficient read-only operations without deserializing
71    /// the underlying data structure.
72    ///
73    /// # Examples
74    ///
75    /// ```
76    /// use splinter_rs::{Splinter, PartitionWrite, PartitionRead};
77    ///
78    /// let mut splinter = Splinter::from_iter([42, 1337]);
79    ///
80    /// let splinter_ref = splinter.encode_to_splinter_ref();
81    /// assert_eq!(splinter_ref.cardinality(), 2);
82    /// assert!(splinter_ref.contains(42));
83    /// ```
84    pub fn encode_to_splinter_ref(&self) -> SplinterRef<Bytes> {
85        SplinterRef { data: self.encode_to_bytes() }
86    }
87
88    #[inline(always)]
89    pub(crate) fn new(inner: Partition<High>) -> Self {
90        Self(inner)
91    }
92
93    #[inline(always)]
94    pub(crate) fn inner(&self) -> &Partition<High> {
95        &self.0
96    }
97
98    #[inline(always)]
99    pub(crate) fn inner_mut(&mut self) -> &mut Partition<High> {
100        &mut self.0
101    }
102}
103
104impl FromIterator<u32> for Splinter {
105    fn from_iter<I: IntoIterator<Item = u32>>(iter: I) -> Self {
106        Self(Partition::<High>::from_iter(iter))
107    }
108}
109
110impl<R: RangeBounds<u32>> From<R> for Splinter {
111    fn from(range: R) -> Self {
112        if let Some(range) = range.try_into_inclusive() {
113            if range.start() == &u32::MIN && range.end() == &u32::MAX {
114                Self::FULL
115            } else {
116                Self(Partition::<High>::from(range))
117            }
118        } else {
119            // range is empty
120            Self::EMPTY
121        }
122    }
123}
124
125impl PartitionRead<High> for Splinter {
126    /// Returns the total number of elements in this splinter.
127    ///
128    /// # Examples
129    ///
130    /// ```
131    /// use splinter_rs::{Splinter, PartitionRead, PartitionWrite};
132    ///
133    /// let mut splinter = Splinter::EMPTY;
134    /// assert_eq!(splinter.cardinality(), 0);
135    ///
136    /// let splinter = Splinter::from_iter([100, 200, 300]);
137    /// assert_eq!(splinter.cardinality(), 3);
138    /// ```
139    #[inline]
140    fn cardinality(&self) -> usize {
141        self.0.cardinality()
142    }
143
144    /// Returns `true` if this splinter contains no elements.
145    ///
146    /// # Examples
147    ///
148    /// ```
149    /// use splinter_rs::{Splinter, PartitionRead, PartitionWrite};
150    ///
151    /// let mut splinter = Splinter::EMPTY;
152    /// assert!(splinter.is_empty());
153    ///
154    /// let splinter = Splinter::from_iter([42]);
155    /// assert!(!splinter.is_empty());
156    /// ```
157    #[inline]
158    fn is_empty(&self) -> bool {
159        self.0.is_empty()
160    }
161
162    /// Returns `true` if this splinter contains the specified value.
163    ///
164    /// # Examples
165    ///
166    /// ```
167    /// use splinter_rs::{Splinter, PartitionRead, PartitionWrite};
168    ///
169    /// let splinter = Splinter::from_iter([42, 1337]);
170    ///
171    /// assert!(splinter.contains(42));
172    /// assert!(splinter.contains(1337));
173    /// assert!(!splinter.contains(999));
174    /// ```
175    #[inline]
176    fn contains(&self, value: u32) -> bool {
177        self.0.contains(value)
178    }
179
180    /// Returns the 0-based position of the value in this splinter if it exists.
181    ///
182    /// This method searches for the given value in the splinter and returns its position
183    /// in the sorted sequence of all elements. If the value doesn't exist, returns `None`.
184    ///
185    /// # Examples
186    ///
187    /// ```
188    /// use splinter_rs::{Splinter, PartitionRead, PartitionWrite};
189    ///
190    /// let splinter = Splinter::from_iter([10, 20, 30]);
191    ///
192    /// assert_eq!(splinter.position(10), Some(0));
193    /// assert_eq!(splinter.position(20), Some(1));
194    /// assert_eq!(splinter.position(30), Some(2));
195    /// assert_eq!(splinter.position(25), None); // doesn't exist
196    /// ```
197    #[inline]
198    fn position(&self, value: u32) -> Option<usize> {
199        self.0.position(value)
200    }
201
202    /// Returns the number of elements in this splinter that are less than or equal to the given value.
203    ///
204    /// This is also known as the "rank" of the value in the sorted sequence of all elements.
205    ///
206    /// # Examples
207    ///
208    /// ```
209    /// use splinter_rs::{Splinter, PartitionRead, PartitionWrite};
210    ///
211    /// let splinter = Splinter::from_iter([10, 20, 30]);
212    ///
213    /// assert_eq!(splinter.rank(5), 0);   // No elements <= 5
214    /// assert_eq!(splinter.rank(10), 1);  // One element <= 10
215    /// assert_eq!(splinter.rank(25), 2);  // Two elements <= 25
216    /// assert_eq!(splinter.rank(30), 3);  // Three elements <= 30
217    /// assert_eq!(splinter.rank(50), 3);  // Three elements <= 50
218    /// ```
219    #[inline]
220    fn rank(&self, value: u32) -> usize {
221        self.0.rank(value)
222    }
223
224    /// Returns the element at the given index in the sorted sequence, or `None` if the index is out of bounds.
225    ///
226    /// The index is 0-based, so `select(0)` returns the smallest element.
227    ///
228    /// # Examples
229    ///
230    /// ```
231    /// use splinter_rs::{Splinter, PartitionRead, PartitionWrite};
232    ///
233    /// let splinter = Splinter::from_iter([100, 50, 200]);
234    ///
235    /// assert_eq!(splinter.select(0), Some(50));   // Smallest element
236    /// assert_eq!(splinter.select(1), Some(100));  // Second smallest
237    /// assert_eq!(splinter.select(2), Some(200));  // Largest element
238    /// assert_eq!(splinter.select(3), None);       // Out of bounds
239    /// ```
240    #[inline]
241    fn select(&self, idx: usize) -> Option<u32> {
242        self.0.select(idx)
243    }
244
245    /// Returns the largest element in this splinter, or `None` if it's empty.
246    ///
247    /// # Examples
248    ///
249    /// ```
250    /// use splinter_rs::{Splinter, PartitionRead, PartitionWrite};
251    ///
252    /// let mut splinter = Splinter::EMPTY;
253    /// assert_eq!(splinter.last(), None);
254    ///
255    /// let splinter = Splinter::from_iter([100, 50, 200]);
256    ///
257    /// assert_eq!(splinter.last(), Some(200));
258    /// ```
259    #[inline]
260    fn last(&self) -> Option<u32> {
261        self.0.last()
262    }
263
264    /// Returns an iterator over all elements in ascending order.
265    ///
266    /// # Examples
267    ///
268    /// ```
269    /// use splinter_rs::{Splinter, PartitionRead, PartitionWrite};
270    ///
271    /// let splinter = Splinter::from_iter([300, 100, 200]);
272    ///
273    /// let values: Vec<u32> = splinter.iter().collect();
274    /// assert_eq!(values, vec![100, 200, 300]);
275    /// ```
276    #[inline]
277    fn iter(&self) -> impl Iterator<Item = u32> {
278        self.0.iter()
279    }
280
281    /// Returns `true` if this splinter contains all values in the specified range.
282    ///
283    /// This method checks whether every value within the given range bounds is present
284    /// in the splinter. An empty range is trivially contained and returns `true`.
285    ///
286    /// # Examples
287    ///
288    /// ```
289    /// use splinter_rs::{Splinter, PartitionRead};
290    ///
291    /// let splinter = Splinter::from_iter([10, 11, 12, 13, 14, 15, 100]);
292    ///
293    /// // Check if range is fully contained
294    /// assert!(splinter.contains_all(10..=15));
295    /// assert!(splinter.contains_all(11..=14));
296    ///
297    /// // Missing values mean the range is not fully contained
298    /// assert!(!splinter.contains_all(10..=16));  // 16 is missing
299    /// assert!(!splinter.contains_all(9..=15));   // 9 is missing
300    ///
301    /// // Empty ranges are trivially contained
302    /// assert!(splinter.contains_all(50..50));
303    /// ```
304    #[inline]
305    fn contains_all<R: RangeBounds<u32>>(&self, values: R) -> bool {
306        self.0.contains_all(values)
307    }
308
309    /// Returns `true` if this splinter has a non-empty intersection with the specified range.
310    ///
311    /// This method checks whether any value within the given range is present
312    /// in the splinter. Returns `false` for empty ranges.
313    ///
314    /// # Examples
315    ///
316    /// ```
317    /// use splinter_rs::{Splinter, PartitionRead};
318    ///
319    /// let splinter = Splinter::from_iter([10, 20, 30]);
320    ///
321    /// // Check for any overlap
322    /// assert!(splinter.contains_any(10..=15));   // Contains 10
323    /// assert!(splinter.contains_any(5..=10));    // Contains 10
324    /// assert!(splinter.contains_any(25..=35));   // Contains 30
325    ///
326    /// // No overlap
327    /// assert!(!splinter.contains_any(0..=9));    // No values in range
328    /// assert!(!splinter.contains_any(40..=50));  // No values in range
329    ///
330    /// // Empty ranges have no intersection
331    /// assert!(!splinter.contains_any(50..50));
332    /// ```
333    #[inline]
334    fn contains_any<R: RangeBounds<u32>>(&self, values: R) -> bool {
335        self.0.contains_any(values)
336    }
337}
338
339impl PartitionWrite<High> for Splinter {
340    /// Inserts a value into this splinter.
341    ///
342    /// Returns `true` if the value was newly inserted, or `false` if it was already present.
343    ///
344    /// # Examples
345    ///
346    /// ```
347    /// use splinter_rs::{Splinter, PartitionWrite, PartitionRead};
348    ///
349    /// let mut splinter = Splinter::EMPTY;
350    ///
351    /// // First insertion returns true
352    /// assert!(splinter.insert(42));
353    /// assert_eq!(splinter.cardinality(), 1);
354    ///
355    /// // Second insertion of same value returns false
356    /// assert!(!splinter.insert(42));
357    /// assert_eq!(splinter.cardinality(), 1);
358    ///
359    /// // Different value returns true
360    /// assert!(splinter.insert(100));
361    /// assert_eq!(splinter.cardinality(), 2);
362    /// ```
363    #[inline]
364    fn insert(&mut self, value: u32) -> bool {
365        self.0.insert(value)
366    }
367
368    /// Removes a value from this splinter.
369    ///
370    /// Returns `true` if the value was present and removed, or `false` if it was not present.
371    ///
372    /// # Examples
373    ///
374    /// ```
375    /// use splinter_rs::{Splinter, PartitionWrite, PartitionRead};
376    ///
377    /// let mut splinter = Splinter::from_iter([42, 100]);
378    /// assert_eq!(splinter.cardinality(), 2);
379    ///
380    /// // Remove existing value
381    /// assert!(splinter.remove(42));
382    /// assert_eq!(splinter.cardinality(), 1);
383    /// assert!(!splinter.contains(42));
384    /// assert!(splinter.contains(100));
385    ///
386    /// // Remove non-existent value
387    /// assert!(!splinter.remove(999));
388    /// assert_eq!(splinter.cardinality(), 1);
389    /// ```
390    #[inline]
391    fn remove(&mut self, value: u32) -> bool {
392        self.0.remove(value)
393    }
394
395    /// Removes a range of values from this splinter.
396    ///
397    /// This method removes all values that fall within the specified range bounds.
398    /// The range can be inclusive, exclusive, or half-bounded using standard Rust range syntax.
399    ///
400    /// # Examples
401    ///
402    /// ```
403    /// use splinter_rs::{Splinter, PartitionRead, PartitionWrite};
404    ///
405    /// let mut splinter = Splinter::from_iter(1..=10);
406    ///
407    /// // Remove values 3 through 7 (inclusive)
408    /// splinter.remove_range(3..=7);
409    /// assert!(!splinter.contains(5));
410    /// assert!(splinter.contains(2));
411    /// assert!(splinter.contains(8));
412    ///
413    /// // Remove from 9 onwards
414    /// splinter.remove_range(9..);
415    /// assert!(!splinter.contains(9));
416    /// assert!(!splinter.contains(10));
417    /// assert!(splinter.contains(8));
418    /// ```
419    #[inline]
420    fn remove_range<R: RangeBounds<u32>>(&mut self, values: R) {
421        self.0.remove_range(values);
422    }
423}
424
425impl Encodable for Splinter {
426    fn encoded_size(&self) -> usize {
427        self.0.encoded_size() + std::mem::size_of::<Footer>()
428    }
429
430    fn encode<B: bytes::BufMut>(&self, encoder: &mut Encoder<B>) {
431        self.0.encode(encoder);
432        encoder.write_footer();
433    }
434}
435
436impl Optimizable for Splinter {
437    #[inline]
438    fn optimize(&mut self) {
439        self.0.optimize();
440    }
441}
442
443impl Extend<u32> for Splinter {
444    #[inline]
445    fn extend<T: IntoIterator<Item = u32>>(&mut self, iter: T) {
446        self.0.extend(iter);
447    }
448}
449
450#[cfg(test)]
451mod tests {
452    use std::ops::Bound;
453
454    use super::*;
455    use crate::{
456        codec::Encodable,
457        level::{Level, Low},
458        testutil::{SetGen, mksplinter, ratio_to_marks, test_partition_read, test_partition_write},
459        traits::Optimizable,
460    };
461    use itertools::{Itertools, assert_equal};
462    use proptest::{
463        collection::{hash_set, vec},
464        proptest,
465    };
466    use rand::{SeedableRng, seq::index};
467    use roaring::RoaringBitmap;
468
469    #[test]
470    fn test_sanity() {
471        let mut splinter = Splinter::EMPTY;
472
473        assert!(splinter.insert(1));
474        assert!(!splinter.insert(1));
475        assert!(splinter.contains(1));
476
477        let values = [1024, 123, 16384];
478        for v in values {
479            assert!(splinter.insert(v));
480            assert!(splinter.contains(v));
481            assert!(!splinter.contains(v + 1));
482        }
483
484        for i in 0..8192 + 10 {
485            splinter.insert(i);
486        }
487
488        splinter.optimize();
489
490        dbg!(&splinter);
491
492        let expected = splinter.iter().collect_vec();
493        test_partition_read(&splinter, &expected);
494        test_partition_write(&mut splinter);
495    }
496
497    #[test]
498    fn test_wat() {
499        let mut set_gen = SetGen::new(0xDEAD_BEEF);
500        let set = set_gen.random_max(64, 4096);
501        let baseline_size = set.len() * 4;
502
503        let mut splinter = Splinter::from_iter(set.iter().copied());
504        splinter.optimize();
505
506        dbg!(&splinter, splinter.encoded_size(), baseline_size, set.len());
507        itertools::assert_equal(splinter.iter(), set.into_iter());
508    }
509
510    #[test]
511    fn test_splinter_write() {
512        let mut splinter = Splinter::from_iter(0u32..16384);
513        test_partition_write(&mut splinter);
514    }
515
516    #[test]
517    fn test_splinter_optimize_growth() {
518        let mut splinter = Splinter::EMPTY;
519        let mut rng = rand::rngs::StdRng::seed_from_u64(0xdeadbeef);
520        let set = index::sample(&mut rng, Low::MAX_LEN, 8);
521        dbg!(&splinter);
522        for i in set {
523            splinter.insert(i as u32);
524            dbg!(&splinter);
525        }
526    }
527
528    #[test]
529    fn test_splinter_from_range() {
530        let splinter = Splinter::from(..);
531        assert_eq!(splinter.cardinality(), (u32::MAX as usize) + 1);
532
533        let mut splinter = Splinter::from(1..);
534        assert_eq!(splinter.cardinality(), u32::MAX as usize);
535
536        splinter.remove(1024);
537        assert_eq!(splinter.cardinality(), (u32::MAX as usize) - 1);
538
539        let mut count = 1;
540        for i in (2048..=256000).step_by(1024) {
541            splinter.remove(i);
542            count += 1
543        }
544        assert_eq!(splinter.cardinality(), (u32::MAX as usize) - count);
545    }
546
547    proptest! {
548        #[test]
549        fn test_splinter_read_proptest(set in hash_set(0u32..16384, 0..1024)) {
550            let expected = set.iter().copied().sorted().collect_vec();
551            test_partition_read(&Splinter::from_iter(set), &expected);
552        }
553
554
555        #[test]
556        fn test_splinter_proptest(set in vec(0u32..16384, 0..1024)) {
557            let splinter = mksplinter(&set);
558            if set.is_empty() {
559                assert!(!splinter.contains(123));
560            } else {
561                let lookup = set[set.len() / 3];
562                assert!(splinter.contains(lookup));
563            }
564        }
565
566        #[test]
567        fn test_splinter_opt_proptest(set in vec(0u32..16384, 0..1024))  {
568            let mut splinter = mksplinter(&set);
569            splinter.optimize();
570            if set.is_empty() {
571                assert!(!splinter.contains(123));
572            } else {
573                let lookup = set[set.len() / 3];
574                assert!(splinter.contains(lookup));
575            }
576        }
577
578        #[test]
579        fn test_splinter_eq_proptest(set in vec(0u32..16384, 0..1024)) {
580            let a = mksplinter(&set);
581            assert_eq!(a, a.clone());
582        }
583
584        #[test]
585        fn test_splinter_opt_eq_proptest(set in vec(0u32..16384, 0..1024)) {
586            let mut a = mksplinter(&set);
587            let b = mksplinter(&set);
588            a.optimize();
589            assert_eq!(a, b);
590        }
591
592        #[test]
593        fn test_splinter_remove_range_proptest(set in hash_set(0u32..16384, 0..1024)) {
594            let expected = set.iter().copied().sorted().collect_vec();
595            let mut splinter = mksplinter(&expected);
596            if let Some(last) = expected.last() {
597                splinter.remove_range((Bound::Excluded(last), Bound::Unbounded));
598                assert_equal(splinter.iter(), expected);
599            }
600        }
601    }
602
603    #[test]
604    fn test_expected_compression() {
605        fn to_roaring(set: impl Iterator<Item = u32>) -> Vec<u8> {
606            let mut buf = std::io::Cursor::new(Vec::new());
607            let mut bmp = RoaringBitmap::from_sorted_iter(set).unwrap();
608            bmp.optimize();
609            bmp.serialize_into(&mut buf).unwrap();
610            buf.into_inner()
611        }
612
613        struct Report {
614            name: String,
615            baseline: usize,
616            //        (actual, expected)
617            splinter: (usize, usize),
618            roaring: (usize, usize),
619
620            splinter_lz4: usize,
621            roaring_lz4: usize,
622        }
623
624        let mut reports = vec![];
625
626        let mut run_test = |name: &str,
627                            set: Vec<u32>,
628                            expected_set_size: usize,
629                            expected_splinter: usize,
630                            expected_roaring: usize| {
631            assert_eq!(set.len(), expected_set_size, "Set size mismatch");
632
633            let mut splinter = Splinter::from_iter(set.clone());
634            splinter.optimize();
635            itertools::assert_equal(splinter.iter(), set.iter().copied());
636
637            test_partition_read(&splinter, &set);
638
639            let expected_size = splinter.encoded_size();
640            let splinter = splinter.encode_to_bytes();
641
642            assert_eq!(
643                splinter.len(),
644                expected_size,
645                "actual encoded size does not match declared encoded size"
646            );
647
648            let roaring = to_roaring(set.iter().copied());
649
650            let splinter_lz4 = lz4::block::compress(&splinter, None, false).unwrap();
651            let roaring_lz4 = lz4::block::compress(&roaring, None, false).unwrap();
652
653            // verify round trip
654            assert_eq!(
655                splinter,
656                lz4::block::decompress(&splinter_lz4, Some(splinter.len() as i32)).unwrap()
657            );
658            assert_eq!(
659                roaring,
660                lz4::block::decompress(&roaring_lz4, Some(roaring.len() as i32)).unwrap()
661            );
662
663            reports.push(Report {
664                name: name.to_owned(),
665                baseline: set.len() * std::mem::size_of::<u32>(),
666                splinter: (splinter.len(), expected_splinter),
667                roaring: (roaring.len(), expected_roaring),
668
669                splinter_lz4: splinter_lz4.len(),
670                roaring_lz4: roaring_lz4.len(),
671            });
672        };
673
674        let mut set_gen = SetGen::new(0xDEAD_BEEF);
675
676        // empty splinter
677        run_test("empty", vec![], 0, 13, 8);
678
679        // 1 element in set
680        let set = set_gen.distributed(1, 1, 1, 1);
681        run_test("1 element", set, 1, 21, 18);
682
683        // 1 fully dense block
684        let set = set_gen.distributed(1, 1, 1, 256);
685        run_test("1 dense block", set, 256, 25, 15);
686
687        // 1 half full block
688        let set = set_gen.distributed(1, 1, 1, 128);
689        run_test("1 half full block", set, 128, 63, 255);
690
691        // 1 sparse block
692        let set = set_gen.distributed(1, 1, 1, 16);
693        run_test("1 sparse block", set, 16, 48, 48);
694
695        // 8 half full blocks
696        let set = set_gen.distributed(1, 1, 8, 128);
697        run_test("8 half full blocks", set, 1024, 315, 2003);
698
699        // 8 sparse blocks
700        let set = set_gen.distributed(1, 1, 8, 2);
701        run_test("8 sparse blocks", set, 16, 60, 48);
702
703        // 64 half full blocks
704        let set = set_gen.distributed(4, 4, 4, 128);
705        run_test("64 half full blocks", set, 8192, 2442, 16452);
706
707        // 64 sparse blocks
708        let set = set_gen.distributed(4, 4, 4, 2);
709        run_test("64 sparse blocks", set, 128, 410, 392);
710
711        // 256 half full blocks
712        let set = set_gen.distributed(4, 8, 8, 128);
713        run_test("256 half full blocks", set, 32768, 9450, 65580);
714
715        // 256 sparse blocks
716        let set = set_gen.distributed(4, 8, 8, 2);
717        run_test("256 sparse blocks", set, 512, 1290, 1288);
718
719        // 512 half full blocks
720        let set = set_gen.distributed(8, 8, 8, 128);
721        run_test("512 half full blocks", set, 65536, 18886, 130810);
722
723        // 512 sparse blocks
724        let set = set_gen.distributed(8, 8, 8, 2);
725        run_test("512 sparse blocks", set, 1024, 2566, 2568);
726
727        // the rest of the compression tests use 4k elements
728        let elements = 4096;
729
730        // fully dense splinter
731        let set = set_gen.distributed(1, 1, 16, 256);
732        run_test("fully dense", set, elements, 80, 63);
733
734        // 128 elements per block; dense partitions
735        let set = set_gen.distributed(1, 1, 32, 128);
736        run_test("128/block; dense", set, elements, 1179, 8208);
737
738        // 32 elements per block; dense partitions
739        let set = set_gen.distributed(1, 1, 128, 32);
740        run_test("32/block; dense", set, elements, 4539, 8208);
741
742        // 16 element per block; dense low partitions
743        let set = set_gen.distributed(1, 1, 256, 16);
744        run_test("16/block; dense", set, elements, 5147, 8208);
745
746        // 128 elements per block; sparse mid partitions
747        let set = set_gen.distributed(1, 32, 1, 128);
748        run_test("128/block; sparse mid", set, elements, 1365, 8282);
749
750        // 128 elements per block; sparse high partitions
751        let set = set_gen.distributed(32, 1, 1, 128);
752        run_test("128/block; sparse high", set, elements, 1582, 8224);
753
754        // 1 element per block; sparse mid partitions
755        let set = set_gen.distributed(1, 256, 16, 1);
756        run_test("1/block; sparse mid", set, elements, 9749, 10248);
757
758        // 1 element per block; sparse high partitions
759        let set = set_gen.distributed(256, 16, 1, 1);
760        run_test("1/block; sparse high", set, elements, 14350, 40968);
761
762        // 1/block; spread low
763        let set = set_gen.dense(1, 16, 256, 1);
764        run_test("1/block; spread low", set, elements, 8325, 8328);
765
766        // each partition is dense
767        let set = set_gen.dense(8, 8, 8, 8);
768        run_test("dense throughout", set, elements, 4113, 2700);
769
770        // the lowest partitions are dense
771        let set = set_gen.dense(1, 1, 64, 64);
772        run_test("dense low", set, elements, 529, 267);
773
774        // the mid and low partitions are dense
775        let set = set_gen.dense(1, 32, 16, 8);
776        run_test("dense mid/low", set, elements, 4113, 2376);
777
778        let random_cases = [
779            // random sets drawing from the enire u32 range
780            (32, High::MAX_LEN, 145, 328),
781            (256, High::MAX_LEN, 1041, 2544),
782            (1024, High::MAX_LEN, 4113, 10168),
783            (4096, High::MAX_LEN, 14350, 40056),
784            (16384, High::MAX_LEN, 51214, 148656),
785            (65536, High::MAX_LEN, 198670, 461288),
786            // random sets with values < 65536
787            (32, 65536, 92, 80),
788            (256, 65536, 540, 528),
789            (1024, 65536, 2071, 2064),
790            (4096, 65536, 5147, 8208),
791            (65536, 65536, 25, 15),
792            // small sets with values < 1024
793            (8, 1024, 44, 32),
794            (16, 1024, 60, 48),
795            (32, 1024, 79, 80),
796            (64, 1024, 111, 144),
797            (128, 1024, 168, 272),
798        ];
799
800        for (count, max, expected_splinter, expected_roaring) in random_cases {
801            let name = if max == High::MAX_LEN {
802                format!("random/{count}")
803            } else {
804                format!("random/{count}/{max}")
805            };
806            run_test(
807                &name,
808                set_gen.random_max(count, max),
809                count,
810                expected_splinter,
811                expected_roaring,
812            );
813        }
814
815        let mut fail_test = false;
816
817        println!("{}", "-".repeat(83));
818        println!(
819            "{:30} {:12} {:>6} {:>10} {:>10} {:>10}",
820            "test", "bitmap", "size", "expected", "relative", "ok"
821        );
822        for report in &reports {
823            println!(
824                "{:30} {:12} {:6} {:10} {:>10} {:>10}",
825                report.name,
826                "Splinter",
827                report.splinter.0,
828                report.splinter.1,
829                "1.00",
830                if report.splinter.0 == report.splinter.1 {
831                    "ok"
832                } else {
833                    fail_test = true;
834                    "FAIL"
835                }
836            );
837
838            let diff = report.roaring.0 as f64 / report.splinter.0 as f64;
839            let ok_status = if report.roaring.0 != report.roaring.1 {
840                fail_test = true;
841                "FAIL".into()
842            } else {
843                ratio_to_marks(diff)
844            };
845            println!(
846                "{:30} {:12} {:6} {:10} {:>10.2} {:>10}",
847                "", "Roaring", report.roaring.0, report.roaring.1, diff, ok_status
848            );
849
850            let diff = report.splinter_lz4 as f64 / report.splinter.0 as f64;
851            println!(
852                "{:30} {:12} {:6} {:10} {:>10.2} {:>10}",
853                "",
854                "Splinter LZ4",
855                report.splinter_lz4,
856                report.splinter_lz4,
857                diff,
858                ratio_to_marks(diff)
859            );
860
861            let diff = report.roaring_lz4 as f64 / report.splinter_lz4 as f64;
862            println!(
863                "{:30} {:12} {:6} {:10} {:>10.2} {:>10}",
864                "",
865                "Roaring LZ4",
866                report.roaring_lz4,
867                report.roaring_lz4,
868                diff,
869                ratio_to_marks(diff)
870            );
871
872            let diff = report.baseline as f64 / report.splinter.0 as f64;
873            println!(
874                "{:30} {:12} {:6} {:10} {:>10.2} {:>10}",
875                "",
876                "Baseline",
877                report.baseline,
878                report.baseline,
879                diff,
880                ratio_to_marks(diff)
881            );
882        }
883
884        // calculate average compression ratio (splinter_lz4 / splinter)
885        let avg_ratio = reports
886            .iter()
887            .map(|r| r.splinter_lz4 as f64 / r.splinter.0 as f64)
888            .sum::<f64>()
889            / reports.len() as f64;
890
891        println!("average compression ratio (splinter_lz4 / splinter): {avg_ratio:.2}");
892
893        assert!(!fail_test, "compression test failed");
894    }
895}